In [None]:
%pip install -r requirements.txt

In [None]:
from dotenv import load_dotenv
from azure.core.credentials import AzureKeyCredential
from azure.ai.documentintelligence.aio import DocumentIntelligenceClient
from azure.ai.documentintelligence.models import AnalyzeResult, AnalyzeDocumentRequest
from azure.storage.blob.aio import BlobServiceClient, BlobClient, ContainerClient
from azure.identity.aio import DefaultAzureCredential
import os

In [None]:
load_dotenv(override=True)

doc_endpoint = os.getenv('DOC_ENDPOINT')
doc_key = os.getenv('DOC_API_KEY')

blob_account_url = os.getenv('BLOB_ACCOUNT_URL')
container_name = os.getenv('CONTAINER_NAME')

file_name = "contract_001_Fourth_Coffee_20250310.pdf"

In [29]:
from pydantic import BaseModel, Field
from typing import Optional, List

class Jurisdiction(BaseModel):
    region:Optional[str] = None
    clause:Optional[str] = None

# Extracted from Document Intelligence
class ContractFields(BaseModel):
    doc_type:Optional[str] = Field(default=None,alias="docType")
    title:Optional[str] = None
    effective_date:Optional[str] = Field(default=None,alias="effectiveDate")
    parties:List[str] = []
    jurisdictions:List[str] = []

class Message(BaseModel):
    message:str

class Contract(BaseModel):
    record_id:str = Field(default=None, alias="recordId")
    data:ContractFields
    errors:Optional[Message] = None
    warnings:Optional[Message] = None

class DocumentOutput(BaseModel):
    values: List[Contract]    

In [None]:
doc_client = DocumentIntelligenceClient(
    endpoint=doc_endpoint,
    credential=AzureKeyCredential(doc_key)
)

In [None]:
blob = BlobClient(
            account_url=blob_account_url,
            container_name=container_name,
            blob_name=file_name,
            credential=DefaultAzureCredential()
       )

In [None]:
print(blob.url)

In [None]:
poller = await doc_client.begin_analyze_document(
    model_id="prebuilt-contract",
    body=AnalyzeDocumentRequest(url_source=blob.url)
    #query_fields=[]  # Todo
)

In [None]:
contract = await poller.result()

In [None]:
print(contract)

In [40]:
contract_fields:ContractFields = ContractFields()

doc = contract.documents[0]

if doc.doc_type:
    contract_fields.doc_type = doc.doc_type

title = doc.fields.get("Title")
if title:
    contract_fields.title = title.value_string

effective_date = doc.fields.get("EffectiveDate")
if effective_date:
    contract_fields.effective_date = effective_date.value_string

parties = doc.fields.get("Parties")
if parties:
    contract_parties:List[str] = []
    for party_idx, party in enumerate(parties.value_array):
        contract_parties.append(party.value_string)
    contract_fields.parties = contract_parties

jurisdictions = doc.fields.get("Jurisdictions")
if jurisdictions:
    contract_jurisdictions:List[Jurisdiction] = []
    for jurisdiction_idx, jurisdiction in enumerate(jurisdictions.value_array):
        
        contract_jurisdiction = Jurisdiction()
        
        region = jurisdiction.value_object.get("Region")
        if region:
            contract_jurisdiction.region = region.value_string

        clause = jurisdiction.value_object.get("Clause")
        if clause:
            contract_jurisdiction.clause = clause.value_string
        
        contract_jurisdictions.append(contract_jurisdiction)
    
    contract_fields.jurisdictions = contract_jurisdictions

In [None]:
print(contract_fields)

In [33]:
document_output = DocumentOutput(values=[])
document_output.values.append(contract_fields)

In [34]:
document_output.model_dump_json(indent=4)

'{\n    "values": [\n        {}\n    ]\n}'