In [1]:
euri_api_key = "euri-a1d04833d1748505045e19d2ad09a30e2b0b1b8877b4178a3b19187dc7e275b2"

In [1]:
import os, json, time, sqlite3, warnings
from typing import Dict, Any, List
import re
import easyocr
import fitz # PyMuPDF
# from euriai.langgraph import EuriaiLangGraph

In [17]:
from euriai.langgraph import EuriaiLangGraph

In [2]:
EURI_API_KEY  = "euri-a1d04833d1748505045e19d2ad09a30e2b0b1b8877b4178a3b19187dc7e275b2"
MODEL         = "gpt-4.1-nano"

In [3]:
INPUT_DIR     = "invoices"
DB_PATH       = "invoice.sqlite" 
PROCESSED_LOG = "processed.json"
POLL_SEC      = 5

In [4]:
os.makedirs(INPUT_DIR, exist_ok=True)

In [5]:
def load_seen() -> set:
    if not os.path.exists(PROCESSED_LOG):
        return set()
    try:
        with open(PROCESSED_LOG, "r", encoding="utf-8") as f:
            return set(json.load(f))
    except Exception:
        return set()


In [6]:
def save_seen(seen: set) -> None:
    with open(PROCESSED_LOG, "w", encoding="utf-8") as f:
        json.dump(sorted(list(seen)), f, ensure_ascii=False, indent=2)
        
seen = load_seen() 

In [7]:
def ensure_schema():
    con = sqlite3.connect(DB_PATH)
    cur = con.cursor()
    # Create if missing
    cur.execute(
        """
        CREATE TABLE IF NOT EXISTS invoices (
          id INTEGER PRIMARY KEY AUTOINCREMENT,
          file_name TEXT,
          vendor TEXT,
          number TEXT,
          date TEXT,
          total REAL,
          currency TEXT,
          raw_json TEXT
        )
        """
    )
    # Add columns if table existed with an older schema
    cur.execute("PRAGMA table_info(invoices);")
    cols = {row[1] for row in cur.fetchall()}
    if "file_name" not in cols:
        cur.execute("ALTER TABLE invoices ADD COLUMN file_name TEXT;")
    if "raw_json" not in cols:
        cur.execute("ALTER TABLE invoices ADD COLUMN raw_json TEXT;")
    con.commit()
    con.close()

ensure_schema()


In [8]:
ocr = easyocr.Reader(["en"], gpu=False)

Using CPU. Note: This module is much faster with a GPU.


In [18]:
clean_graph = EuriaiLangGraph(api_key=EURI_API_KEY, default_model=MODEL)
clean_graph.add_ai_node(
    "CLEAN",
    """You clean noisy OCR to plain text.
- Keep facts.
- No guessing.
- Keep table rows readable.

OCR:
{ocr_text}"""
)
clean_graph.set_entry_point("CLEAN")
clean_graph.set_finish_point("CLEAN")


Added AI node: CLEAN (model: gpt-4.1-nano)
Set entry point: CLEAN
Set finish point: CLEAN


In [19]:
extract_graph = EuriaiLangGraph(api_key=EURI_API_KEY, default_model=MODEL)
extract_graph.add_ai_node(
    "EXTRACT",
    """From CLEAN_TEXT, return STRICT JSON with keys exactly:
vendor, number, date, total, currency,
line_items (list of {{description, quantity, unit_price, amount}}).

Unknown → null. Numbers numeric. Dates YYYY-MM-DD if possible.
JSON ONLY, no extra text.

CLEAN_TEXT:
{clean_text}"""
)
extract_graph.set_entry_point("EXTRACT")
extract_graph.set_finish_point("EXTRACT")

Added AI node: EXTRACT (model: gpt-4.1-nano)
Set entry point: EXTRACT
Set finish point: EXTRACT


In [20]:
def pick_text(x, *, prefer_key=None):
    """Return a plain string from various possible structures.
    If x is a dict, try prefer_key or common keys; else stringify.
    """
    if isinstance(x, str):
        return x
    if isinstance(x, dict):
        if prefer_key and prefer_key in x and isinstance(x[prefer_key], str):
            return x[prefer_key]
        for k in ("output", "text", "CLEAN_output", "EXTRACT_output"):
            if k in x and isinstance(x[k], str):
                return x[k]
        return json.dumps(x, ensure_ascii=False)
    return str(x)


In [21]:
def parse_json_safe(raw):
    """Parse JSON robustly. Accepts dict or str; falls back to substring."""
    if isinstance(raw, dict):
        return raw
    if not isinstance(raw, str):
        return {"__raw__": raw}
    try:
        return json.loads(raw)
    except Exception:
        pass
    try:
        s, e = raw.find("{"), raw.rfind("}")
        if s != -1 and e != -1 and e > s:
            return json.loads(raw[s:e+1])
    except Exception:
        pass
    return {"__raw__": raw}

In [22]:
def _heuristic_extract(clean_text: str) -> dict:
    """Very simple regex-based extractor to keep DB flowing when AI is down."""
    def find(pat, s):
        m = re.search(pat, s, re.IGNORECASE)
        return m.group(1).strip() if m else None

    vendor = find(r"Vendor:\s*(.+)", clean_text)
    number = find(r"(?:Invoice Number|Invoice No\.?):\s*([A-Za-z0-9\-]+)", clean_text)
    date   = find(r"(?:Invoice Date|Date):\s*([0-9]{4}-[0-9]{2}-[0-9]{2})", clean_text)
    total  = find(r"Total:\s*([0-9]+(?:\.[0-9]+)?)", clean_text)
    curr   = find(r"Total:\s*[0-9]+(?:\.[0-9]+)?\s*([A-Za-z]{3})", clean_text) or find(r"Currency:\s*([A-Za-z]{3})", clean_text)

    try: total = float(total) if total is not None else None
    except: total = None

    return {
        "vendor": vendor,
        "number": number,
        "date":   date,
        "total":  total,
        "currency": curr,
        "line_items": []
    }


In [9]:
def NODE_OCR(file_path: str) -> Dict[str, Any]:
    """Read an image or PDF file and return OCR text."""
    file_ext = os.path.splitext(file_path)[1].lower()
    
    if file_ext in [".png", ".jpg", ".jpeg"]:
        # Existing image OCR logic
        text = "\n".join(ocr.readtext(file_path, detail=0))
        return {"ocr_text": text}
    
    elif file_ext == ".pdf":
        # PDF processing logic
        doc = fitz.open(file_path)
        full_text = []
        for page_num in range(len(doc)):
            page = doc.load_page(page_num)
            # First, try to extract text directly
            page_text = page.get_text()
            if page_text.strip():
                full_text.append(page_text)
            else:
                # If no text, assume it's an image-based PDF and use OCR
                pix = page.get_pixmap()
                img_bytes = pix.tobytes("png")
                # Use easyocr on the image bytes
                ocr_results = ocr.readtext(img_bytes, detail=0)
                full_text.append("\n".join(ocr_results))
        
        doc.close()
        return {"ocr_text": "\n".join(full_text)}
        
    else:
        # Unsupported file type
        warnings.warn(f"Unsupported file type: {file_ext}")
        return {"ocr_text": ""}

In [10]:
a = NODE_OCR('content.pdf')

In [13]:
with open('output.txt', 'w', encoding='utf-8') as f:
    for i in a['ocr_text'].split('\n'):
        print(i)
        f.write(i + '\n')

7M
MHz Crystal 
Rev. 1.09
Page 1
www.txccrystal.com
16 ~ 20  MHz
60Ω  Max.
20 ~ 64  MHz
50Ω  Max.
80Ω  Max.
12 ~ 16  MHz
80Ω  Max.
80 ~ 114  MHz
60Ω  Max.
10 ~ 12  MHz
150Ω  Max.
60 ~ 80  MHz
Notes:
[1] Please contact us for low aging +/-1ppm grade crystals.
Motional Resistance (ESR)
Fundamental
3rd Overtone
Aging (at 25°C ± 3°C, first year)
Faging
 ±3ppm Max.
Note [1]
Storage Temperature Range
TSTR
-40°C ~ +85°C
Drive Level
DL
1 ~ 200μW (100μW Typ.)
Load Capacitance
CL
6pF, 7pF, 8pF, 9pF, 10pF
12pF, 16pF, 18pF, 20pF
-40°C ~ +85°C
-40°C ~ +125°C
Shunt Capacitance
C0
3pF Max.
-40°C ~ +85°C
±20 ppm
±15 ppm
±60 ppm
-40°C ~ +125°C
Operating Temperature Range
TOTR
-20°C ~ +70°C
Frequency Stability over
Operating Temperature Range
(refer to 25°C)
Fstab
±30 ppm
-20°C ~ +70°C
±20 ppm
±10 ppm
±10 ppm
-30°C ~ +85°C
±30 ppm
Frequency Tolerance
Ftol
±30ppm, ±20ppm, ±10ppm
at 25°C
Electrical Characteristics
Item
7M
Conditions
Frequency Range
F0
10MHz ~ 114MHz

RoHS compliant & Pb free

External d

In [24]:
b = NODE_CLEAN(a)

[1m[values][0m {'ocr_text': {'ocr_text': '7M\nMHz Crystal \nRev. 1.09\nPage 1\nwww.txccrystal.com\n16 ~ 20  MHz\n60Ω  Max.\n20 ~ 64  MHz\n50Ω  Max.\n80Ω  Max.\n12 ~ 16  MHz\n80Ω  Max.\n80 ~ 114  MHz\n60Ω  Max.\n10 ~ 12  MHz\n150Ω  Max.\n60 ~ 80  MHz\nNotes:\n[1] Please contact us for low aging +/-1ppm grade crystals.\nMotional Resistance (ESR)\nFundamental\n3rd Overtone\nAging (at 25°C ± 3°C, first year)\nFaging\n ±3ppm Max.\nNote [1]\nStorage Temperature Range\nTSTR\n-40°C ~ +85°C\nDrive Level\nDL\n1 ~ 200μW (100μW Typ.)\nLoad Capacitance\nCL\n6pF, 7pF, 8pF, 9pF, 10pF\n12pF, 16pF, 18pF, 20pF\n-40°C ~ +85°C\n-40°C ~ +125°C\nShunt Capacitance\nC0\n3pF Max.\n-40°C ~ +85°C\n±20 ppm\n±15 ppm\n±60 ppm\n-40°C ~ +125°C\nOperating Temperature Range\nTOTR\n-20°C ~ +70°C\nFrequency Stability over\nOperating Temperature Range\n(refer to 25°C)\nFstab\n±30 ppm\n-20°C ~ +70°C\n±20 ppm\n±10 ppm\n±10 ppm\n-30°C ~ +85°C\n±30 ppm\nFrequency Tolerance\nFtol\n±30ppm, ±20ppm, ±10ppm\nat 25°C\nElectrical 

In [None]:
c = NODE_EXTRACT(b)

Graph compiled successfully
[1m[values][0m {'clean_text': '{"clean_text": "Cleaned and Fact-Checked Plain Text Version:\\n\\n---\\n\\n**Product: 7M MHz Crystal Rev. 1.09**  \\nWebsite: www.txccrystal.com\\n\\n**Frequency Range:** 10 MHz to 114 MHz\\n\\n**Electrical Characteristics:**\\n\\n| Item                        | Conditions                                | Details                                              |\\n|------------------------------|-------------------------------------------|------------------------------------------------------|\\n| Frequency Range (F0)         | -                                         | 10 MHz to 114 MHz                                   |\\n| RoHS Compliance              | -                                         | RoHS compliant & Pb free                            |\\n| External Dimensions (mm)     | -                                         | Length: 3.2 mm, Width: 2.5 mm, Height: 0.7 mm       |\\n| Applications                 | -        

: 

In [14]:
def NODE_CLEAN(ocr_text: str) -> Dict[str, Any]:
    """Normalize noisy OCR text using the CLEAN AI node; fallback to pass-through on error."""
    try:
        clean_raw = clean_graph.run({"ocr_text": ocr_text})
        clean_text = pick_text(clean_raw, prefer_key="CLEAN_output")
        if not isinstance(clean_text, str) or not clean_text.strip():
            raise RuntimeError("Empty CLEAN output")
        return {"clean_text": clean_text, "CLEAN_raw": clean_raw}
    except Exception as e:
        print(f"[CLEAN:FALLBACK] {e}")
        return {"clean_text": ocr_text, "CLEAN_raw": {"fallback": True}}

In [15]:
def NODE_EXTRACT(clean_text_any) -> dict:
    """Extract structured JSON using the EXTRACT AI node; fallback to heuristic on error."""
    clean_text = pick_text(clean_text_any, prefer_key="CLEAN_output")
    try:
        result = extract_graph.run({"clean_text": clean_text})
        raw_json = pick_text(result, prefer_key="EXTRACT_output")
        return {"raw_json": raw_json, "EXTRACT_raw": result}
    except Exception as e:
        print(f"[EXTRACT:FALLBACK] {e}")
        heuristic = _heuristic_extract(clean_text)
        return {"raw_json": json.dumps(heuristic, ensure_ascii=False), "EXTRACT_raw": {"fallback": True}}


In [20]:
def NODE_VALIDATE(data: Dict[str, Any]) -> Dict[str, Any]:
    """Minimal schema checks and numeric sanity for demo purposes."""
    issues: List[str] = []
    for k in ["vendor", "number", "date", "currency"]:
        if k not in data or data.get(k) in (None, ""):
            issues.append(f"missing key: {k}")
    try:
        if data.get("total") is None:
            issues.append("total is null")
        else:
            float(data.get("total"))
    except Exception:
        issues.append(f"total not numeric: {data.get('total')}")
    if not isinstance(data.get("line_items", []), list):
        issues.append("line_items not a list")
    return {"valid": len(issues) == 0, "issues": issues}


In [21]:
def NODE_PERSIST(file_name: str, data: dict, raw_json_any):
    raw_json_str = json.dumps(raw_json_any, ensure_ascii=False) if not isinstance(raw_json_any, str) else raw_json_any
    con = sqlite3.connect(DB_PATH)
    cur = con.cursor()
    cur.execute(
        """
        INSERT INTO invoices(file_name, vendor, number, date, total, currency, raw_json)
        VALUES (?, ?, ?, ?, ?, ?, ?)
        """,
        (
            file_name,
            data.get("vendor"),
            data.get("number"),
            data.get("date"),
            data.get("total"),
            data.get("currency"),
            raw_json_str,
        ),
    )
    rowid = cur.lastrowid
    con.commit()
    con.close()
    print(f"[DB] Inserted row id={rowid} at {os.path.abspath(DB_PATH)}")
    return {"db": "sqlite", "rowid": rowid}

In [22]:
def NODE_NOTIFY(file_name: str, data: Dict[str, Any], valid: bool, issues: List[str]) -> None:
    status = "OK" if valid else f"WARN: {issues}"
    print(
        f"[{status}] file='{file_name}' vendor={data.get('vendor')} number={data.get('number')} total={data.get('total')} {data.get('currency')}"
    )

In [23]:
NODES = ["WATCH", "OCR", "CLEAN", "EXTRACT", "VALIDATE", "PERSIST", "NOTIFY"]
EDGES = [
    ("WATCH", "OCR"),
    ("OCR", "CLEAN"),
    ("CLEAN", "EXTRACT"),
    ("EXTRACT", "VALIDATE"),
    ("VALIDATE", "PERSIST"),
    ("PERSIST", "NOTIFY"),
]

In [24]:
def run_pipeline_for_file(file_path: str) -> None:
    file_name = os.path.basename(file_path)

    # OCR → CLEAN → EXTRACT
    ocr_out     = NODE_OCR(file_path)
    clean_out   = NODE_CLEAN(pick_text(ocr_out.get("ocr_text")))
    extract_out = NODE_EXTRACT(clean_out.get("clean_text"))

    # Parse JSON → VALIDATE
    raw_json = extract_out["raw_json"]
    data     = parse_json_safe(raw_json)
    val_out  = NODE_VALIDATE(data)

    # Persist + Notify
    NODE_PERSIST(file_name, data, raw_json)
    NODE_NOTIFY(file_name, data, val_out["valid"], val_out["issues"])


In [None]:
print(f"\nWatching '{INPUT_DIR}' every {POLL_SEC}s. Only NEW .png/.jpg/.pdf will be processed.")
try:
    while True:
        for fname in sorted(os.listdir(INPUT_DIR)):
            if not fname.lower().endswith((".png", ".jpg", ".jpeg", ".pdf")):
                continue
            if fname in seen:
                continue  # already processed (persists across restarts)
            fpath = os.path.join(INPUT_DIR, fname)
            try:
                run_pipeline_for_file(fpath)          # trigger full pipeline
                seen.add(fname); save_seen(seen)      # mark as processed
            except Exception as e:
                print(f"[ERROR] {fname}: {e}")
        time.sleep(POLL_SEC)
except KeyboardInterrupt:
    print("Stopped watching.")


Watching 'invoices' every 5s. Only NEW .png/.jpg/.pdf will be processed.




Graph compiled successfully
[1m[values][0m {'ocr_text': '8\nL\'\n9\n\n130\n3\n8\n8\n3\n1\n6\n8\n8\n3\n63\n5\n1\n1\n1\n1\n1\n0\n23\n1\n1\n1\n1\n3\n1\n[\n1\n2\n9\n4\n1\n2\n8\n1\nL\n[\n2\nI\n{\n8\n1\n2\n8\nI\n{\n8\n2\nH\n5\nIV\n3\nJ\n\n3\n3\nE\n8\n1\n1\n2\n4\n3\n!\n8\n1\n4\n0\nNI \n9\n8\nWle\n2\n1\n6\nHplh\n8\n2\n7\n8\ng\nIlwh\ng ?\n[\n0\n0\n3\n8\ng\n2\nJ\n3\n8\n{\n1\n5\n1\n0\n1\n2\n8\n1\n0\n1\n8\nL\n#\n8\n2\n6\n1\n3\n8\n2l2g\n8\n1\n8\n0\n8\nO\no\n0\n5\n3\ng\n2\n7\nOBGYN VITALS MONITORING AND\n8 children\nNUTRITIONAL SCREENING FOR OUTPATIENTS\nVITALS MONITORING CHART\nNAME:\nAge:\n249k_\nUo $\nAtankyha\nmiSrq\nHeight:\nT62y\nQ@64<0)\nLMP:\nPatient:\nEDD:\nHusband:\nWEIGHT\nBMI\nNURSE\nBLOOD PRESSURE\nPULSE\nSPOz\nTEMP\nINITIALS\n(mmHg)\n(b/mts)\n(%)\n(*F)\n(Kg)\n(Kglm})\nI4ol &0\nGsbl\n9L6F\n64K4\n44/35\nEb4L\nJof:\n983/Azak\n94,3S\nL36L0\nR4bIK\n4LL\n9$2*E\nL4Lua\naLp-6\nLuiky EHwl\nHo\n@ckn\n3l4k\n44/a\n#8l4L\n0&f.\n927\nshlt-W\n#6/ ~\n9 9+\n91.5\n6p\n664K9\n35.30\nLR ,\n9900421492\nP



[1m[values][0m {'ocr_text': '8\nL\'\n9\n\n130\n3\n8\n8\n3\n1\n6\n8\n8\n3\n63\n5\n1\n1\n1\n1\n1\n0\n23\n1\n1\n1\n1\n3\n1\n[\n1\n2\n9\n4\n1\n2\n8\n1\nL\n[\n2\nI\n{\n8\n1\n2\n8\nI\n{\n8\n2\nH\n5\nIV\n3\nJ\n\n3\n3\nE\n8\n1\n1\n2\n4\n3\n!\n8\n1\n4\n0\nNI \n9\n8\nWle\n2\n1\n6\nHplh\n8\n2\n7\n8\ng\nIlwh\ng ?\n[\n0\n0\n3\n8\ng\n2\nJ\n3\n8\n{\n1\n5\n1\n0\n1\n2\n8\n1\n0\n1\n8\nL\n#\n8\n2\n6\n1\n3\n8\n2l2g\n8\n1\n8\n0\n8\nO\no\n0\n5\n3\ng\n2\n7\nOBGYN VITALS MONITORING AND\n8 children\nNUTRITIONAL SCREENING FOR OUTPATIENTS\nVITALS MONITORING CHART\nNAME:\nAge:\n249k_\nUo $\nAtankyha\nmiSrq\nHeight:\nT62y\nQ@64<0)\nLMP:\nPatient:\nEDD:\nHusband:\nWEIGHT\nBMI\nNURSE\nBLOOD PRESSURE\nPULSE\nSPOz\nTEMP\nINITIALS\n(mmHg)\n(b/mts)\n(%)\n(*F)\n(Kg)\n(Kglm})\nI4ol &0\nGsbl\n9L6F\n64K4\n44/35\nEb4L\nJof:\n983/Azak\n94,3S\nL36L0\nR4bIK\n4LL\n9$2*E\nL4Lua\naLp-6\nLuiky EHwl\nHo\n@ckn\n3l4k\n44/a\n#8l4L\n0&f.\n927\nshlt-W\n#6/ ~\n9 9+\n91.5\n6p\n664K9\n35.30\nLR ,\n9900421492\nPage 1 of 2\nAWFTOBGIDOC/MR/O



[1m[values][0m {'ocr_text': 'Vendor: Tech Solutions Ltd.\nInvoice Number: INV-1033\nInvoice Date: 2025-09-27\nBilling Address: 123 Example Street, City\nBilling Email:\n@example.com\nSubtotal: 587.14 INR\n105.69 INR\nTotal: 692.8299999999999 INR\nDue Date: 2025-10-13\nPO Number: PO-2033\nNotes: Payment due within 30 days_\nbilling\nTax:'}
[1m[updates][0m {'CLEAN': {'ocr_text': 'Vendor: Tech Solutions Ltd.\nInvoice Number: INV-1033\nInvoice Date: 2025-09-27\nBilling Address: 123 Example Street, City\nBilling Email:\n@example.com\nSubtotal: 587.14 INR\n105.69 INR\nTotal: 692.8299999999999 INR\nDue Date: 2025-10-13\nPO Number: PO-2033\nNotes: Payment due within 30 days_\nbilling\nTax:', 'CLEAN_output': 'Vendor: Tech Solutions Ltd.  \nInvoice Number: INV-1033  \nInvoice Date: 2025-09-27  \nBilling Address: 123 Example Street, City  \nBilling Email: @example.com  \nSubtotal: 587.14 INR  \nTax: 105.69 INR  \nTotal: 692.83 INR  \nDue Date: 2025-10-13  \nPO Number: PO-2033  \nNotes: Paymen



[1m[values][0m {'ocr_text': 'Vendor: Tech Solutions Ltd.\nInvoice Number: INV-1033\nInvoice Date: 2025-09-27\nBilling Address: 123 Example Street, City\nBilling Email:\n@example.com\nSubtotal: 587.14 INR\n105.69 INR\nTotal: 692.8299999999999 INR\nDue Date: 2025-10-13\nPO Number: PO-2033\nNotes: Payment due within 30 days_\nbilling\nTax:'}
[1m[updates][0m {'CLEAN': {'ocr_text': 'Vendor: Tech Solutions Ltd.\nInvoice Number: INV-1033\nInvoice Date: 2025-09-27\nBilling Address: 123 Example Street, City\nBilling Email:\n@example.com\nSubtotal: 587.14 INR\n105.69 INR\nTotal: 692.8299999999999 INR\nDue Date: 2025-10-13\nPO Number: PO-2033\nNotes: Payment due within 30 days_\nbilling\nTax:', 'CLEAN_output': 'Vendor: Tech Solutions Ltd.  \nInvoice Number: INV-1033  \nInvoice Date: 2025-09-27  \nBilling Address: 123 Example Street, City  \nBilling Email: @example.com  \nSubtotal: 587.14 INR  \nTax: 105.69 INR  \nTotal: 692.83 INR  \nDue Date: 2025-10-13  \nPO Number: PO-2033  \nNotes: Paymen



[1m[values][0m {'ocr_text': 'Vendor: Global Supplies Inc.\nInvoice Number: INV-1011\nInvoice Date: 2025-09-11\nBilling Address: 123 Example Street, City\nBilling Email: billing@example.com\nSubtotal: 556.92 INR\n100.25 INR\nTotal: 657.17 INR\nDue Date: 2025-10-22\nPO Number: PO-2011\nNotes: Payment due within 30 days_\nTax:'}
[1m[updates][0m {'CLEAN': {'ocr_text': 'Vendor: Global Supplies Inc.\nInvoice Number: INV-1011\nInvoice Date: 2025-09-11\nBilling Address: 123 Example Street, City\nBilling Email: billing@example.com\nSubtotal: 556.92 INR\n100.25 INR\nTotal: 657.17 INR\nDue Date: 2025-10-22\nPO Number: PO-2011\nNotes: Payment due within 30 days_\nTax:', 'CLEAN_output': 'Vendor: Global Supplies Inc.  \nInvoice Number: INV-1011  \nInvoice Date: 2025-09-11  \nBilling Address: 123 Example Street, City  \nBilling Email: billing@example.com  \nSubtotal: 556.92 INR  \nTax: 100.25 INR  \nTotal: 657.17 INR  \nDue Date: 2025-10-22  \nPO Number: PO-2011  \nNotes: Payment due within 30 da



[1m[values][0m {'ocr_text': 'Vendor: Tech Solutions Ltd.\nInvoice Number: INV-1012\nInvoice Date: 2025-09-18\nBilling Address: 123 Example Street, City\nEmail: billing@example.com\nSubtotal: 945.69 INR\n170.22 INR\nTotal: 1115.91 INR\nDue Date: 2025-10-10\nPO Number: PO-2012\nNotes: Payment due within 30 days_\nBilling\nTax:'}
[1m[updates][0m {'CLEAN': {'ocr_text': 'Vendor: Tech Solutions Ltd.\nInvoice Number: INV-1012\nInvoice Date: 2025-09-18\nBilling Address: 123 Example Street, City\nEmail: billing@example.com\nSubtotal: 945.69 INR\n170.22 INR\nTotal: 1115.91 INR\nDue Date: 2025-10-10\nPO Number: PO-2012\nNotes: Payment due within 30 days_\nBilling\nTax:', 'CLEAN_output': 'Vendor: Tech Solutions Ltd.  \nInvoice Number: INV-1012  \nInvoice Date: 2025-09-18  \nBilling Address: 123 Example Street, City  \nEmail: billing@example.com  \nSubtotal: 945.69 INR  \nTax: 170.22 INR  \nTotal: 1115.91 INR  \nDue Date: 2025-10-10  \nPO Number: PO-2012  \nNotes: Payment due within 30 days', '