In [1]:
from datetime import datetime
from pydantic import BaseModel, Field


class LineItem(BaseModel):
    """A line item in an invoice."""

    item_name: str = Field(description="The name of this item")
    price: float = Field(description="The price of this item")


class Invoice(BaseModel):
    """A representation of information from an invoice."""

    invoice_id: str = Field(
        description="A unique identifier for this invoice, often a number"
    )
    date: datetime = Field(description="The date this invoice was created")
    line_items: list[LineItem] = Field(
        description="A list of all the items in this invoice"
    )

In [2]:
from llama_index.readers.file import PDFReader
from pathlib import Path

pdf_reader = PDFReader()
# documents = pdf_reader.load_data(file=Path("./uber_receipt.pdf"))
documents = pdf_reader.load_data(file=Path("../agent/data/2023_canadian_budget.pdf"))
text = documents[0].text

In [3]:
text

'‹\xa02022 2024›\n2023 budget of the\nCanadian federal\ngovernment\nSubmitted 28 March 2023\nPresented 28 March 2023\nParliament 44th\nParty Liberal\nFinance\nminister\nChrystia Freeland\nTotal revenue $456.8 billion\n(projected)\nTotal\nexpenditures\n$496.9 billion\n(projected)\nDeﬁcit $40.1 billion\n(projected)[ 1 ] \nGDP TBA\nWebsite 2023 Budget (http\ns://www.budget.can\nada.ca/2023/home-\naccueil-en.html)\n2023 Canadian federal budget\nThe Canadian federal budget for the fiscal years of 2023–24 was\npresented to the House of Commons by Finance Minister Chrystia\nFreeland on 28 March 2023.[2] The budget was meant to reflect\nPrime Minister Justin Trudeau\'s stated policy objective to "make\nlife more affordable for Canadians"[3] while also reducing\ngovernment expenditures.[4]\nThe 2023 budget is the seventh budget document introduced in\nthe House of Commons under the premiership of Justin Trudeau.\nIt comes at the heel of the first anniversary of the Russian\ninvasion of Ukraine,

In [4]:
# from llama_index.llms.openai import OpenAI
# llm = OpenAI(model="gpt-4o")
from llama_index.llms.ollama import Ollama
llm = Ollama(model="qwen2.5:7b", request_timeout=720.0)
sllm = llm.as_structured_llm(Invoice)

response = sllm.complete(text)

In [5]:
import json
json_response = json.loads(response.text)
print(json.dumps(json_response, indent=2))

{
  "invoice_id": "BC2023-01",
  "date": "2023-06-22T00:00:00Z",
  "line_items": [
    {
      "item_name": "Budget Presentation to the House of Commons",
      "price": 0.0
    },
    {
      "item_name": "Introduction of Net New Spending Over Six Years ($43B)",
      "price": 43000000000.0
    },
    {
      "item_name": "Green Technologies Tax Credit (15% Refundable, $20B)",
      "price": 20000000000.0
    },
    {
      "item_name": "Dental Care Program ($13B for Means-Tested Dental Coverage)",
      "price": 13000000000.0
    },
    {
      "item_name": "Grocery Rebate (Up to $467 for Families, Up to $234 for Singles Without Kids)",
      "price": 0.0
    },
    {
      "item_name": "Increased Canada Student Grants Funding (40% Increase in Funding)",
      "price": 0.0
    },
    {
      "item_name": "Spending Cuts ($15B Achieved by Defunding Public Services and Canceling Programs)",
      "price": -15000000000.0
    },
    {
      "item_name": "Stock Buyback Tax (2% on Stock Buy