In [None]:
!pip install azure.ai.documentintelligence 

In [None]:
# Import packages
import os
from azure.ai.projects import AIProjectClient
from azure.identity import DefaultAzureCredential
from dotenv import load_dotenv

load_dotenv(override=True) # Load environment variables from .env file

# Use the connection string to connect to your Foundry project
project_connection_string = os.getenv("AIPROJECT_CONNECTION_STRING")
try:
    project = AIProjectClient.from_connection_string(
        conn_str=project_connection_string, credential=DefaultAzureCredential()
    )
except Exception as e:
    print(f"Failed to connect to project: {e}")
    raise


In [None]:
from azure.ai.projects.models import ConnectionType

connection = project.connections.get_default(
    connection_type=ConnectionType.AZURE_AI_SERVICES,
    include_credentials=True,  # Optional. Defaults to "False".
)
print(connection)

In [None]:
from azure.ai.documentintelligence import DocumentIntelligenceClient
from azure.ai.documentintelligence.models import AnalyzeResult, DocumentContentFormat
from azure.identity import DefaultAzureCredential
from azure.core.credentials import AzureKeyCredential

document_intelligence_client = DocumentIntelligenceClient(
    endpoint = connection.endpoint_url,
    credential=AzureKeyCredential(connection.key) # assuming the foundry is using a key credential. 
)



### Option 1: Convert the file to markdown

In [None]:
pdf_fpath = "data/0001062993-25-004143.pdf"

with open(pdf_fpath, "rb") as f:
    poller = document_intelligence_client.begin_analyze_document(
        model_id="prebuilt-layout",
        body=f,
        output_content_format=DocumentContentFormat.MARKDOWN,
        content_type="application/pdf"
    )

result: AnalyzeResult = poller.result()

markdown = result.content
print(markdown[:1000])  # Print the first 1000 characters of the markdown content

### Option 2: Get detected keys or content

In [None]:

with open(pdf_fpath, "rb") as f:
    poller = document_intelligence_client.begin_analyze_document(
        model_id="prebuilt-layout",
        body=f,
        output_content_format=DocumentContentFormat.Text,
        features=["keyValuePairs"],
        content_type="application/pdf"
    )

result: AnalyzeResult = poller.result()

print("----Key-value pairs found in document----")
if result.key_value_pairs:
    for kv_pair in result.key_value_pairs:
        if kv_pair.key:
            print(f"Key '{kv_pair.key.content}' found within " f"'{kv_pair.key.bounding_regions}' bounding regions")
        if kv_pair.value:
            print(
                f"Value '{kv_pair.value.content}' found within "
                f"'{kv_pair.value.bounding_regions}' bounding regions\n"
            )