## Use LLM to parse document or image to structured output
### - Example shows parsing a sample driving license to structured output
### - Using Langchain for agent configuration and OpenAI model for LLM

### 1) Set OpenAI API Key, Install and Import Packages 

In [None]:
#!pip install -U langchain
#!pip install pydantic

In [None]:
import os
os.environ["OPENAI_API_KEY"] = "YOUR_OPENAI_API_KEY"

In [None]:
from langchain.agents import create_agent
from langchain.messages import HumanMessage
from pydantic import BaseModel
import base64

### 2) Read a sample driving license image and encode to base64

In [None]:
with open("dl.jpg", "rb") as image_file:
    img_b64 = base64.b64encode(image_file.read()).decode("utf-8")

### 3) Define Class with properties holding driving license details

In [None]:
class DrivingLicenseInfo(BaseModel):
    driver_name: str
    driving_license_number: str
    expiry_date: str

### 4) Create agent which will use LLM to parse document details. 
#### - Pay attention to response_format configuration for structured output

In [None]:
agent = create_agent(
    model='gpt-5-nano',
    system_prompt="You are a driving license image reader which reads information from driving license image.",
    response_format=DrivingLicenseInfo
)

### 5) Invoke agent passing base64 encoded image as multi-modal input

In [None]:
multimodal_question = HumanMessage(content=[
    {"type": "image", "base64": img_b64, "mime_type": "image/jpg"}
])

response = agent.invoke(
    {"messages": [multimodal_question]}
)
dl = response["structured_response"]

### 6) Reading structured output

In [None]:
print(dl.driver_name)
print(dl.driving_license_number)
print(dl.expiry_date)