In [None]:
%pip install -r requirements.txt

In [49]:
from dotenv import load_dotenv
from azure.core.credentials import AzureKeyCredential
from azure.ai.documentintelligence.aio import DocumentIntelligenceClient
from azure.ai.documentintelligence.models import AnalyzeResult, AnalyzeDocumentRequest, DocumentAnalysisFeature
from azure.storage.blob.aio import BlobServiceClient, BlobClient, ContainerClient
from azure.identity.aio import DefaultAzureCredential
import os

In [None]:
from pydantic import BaseModel, Field
from typing import Optional, List

class Jurisdiction(BaseModel):
    region: Optional[str] = Field(default=None, alias="Region")
    clause: Optional[str] = Field(default=None, alias="Clause")

class Party(BaseModel):
    name: Optional[str] = Field(default=None, alias="Name")
    address: Optional[str] = Field(default=None, alias="Address")
    reference_name: Optional[str] = Field(default=None, alias="ReferenceName")
    clause: Optional[str] = Field(default=None, alias="Clause")

# Extracted from Document Intelligence
class ContractFields(BaseModel):
    doc_type: Optional[str] = Field(default=None, alias="docType")
    title: Optional[str] = Field(default=None, alias="Title")
    contract_id: Optional[str] = Field(default=None, alias="ContractId")
    parties: List[Party] = []
    execution_date: Optional[str] = Field(default=None, alias="ExecutionDate")
    effective_date: Optional[str] = Field(default=None, alias="EffectiveDate")
    expiration_date: Optional[str] = Field(default=None, alias="ExpirationDate")
    contract_duration: Optional[str] = Field(default=None, alias="ContractDuration")
    renewal_date: Optional[str] = Field(default=None, alias="RenewalDate")
    jurisdictions: List[Jurisdiction] = []

class Message(BaseModel):
    message: str

class Contract(BaseModel):
    record_id: str = Field(default=None, alias="recordId")
    data: ContractFields
    errors: Optional[Message] = None
    warnings: Optional[Message] = None

class DocumentOutput(BaseModel):
    values: List[Contract]


In [52]:
from pydantic import BaseModel, Field
from typing import Optional, List

class Jurisdiction(BaseModel):
    region:Optional[str] = None
    clause:Optional[str] = None

class Party(BaseModel):
    name:Optional[str] = None
    address:Optional[str] = None
    referenceName:Optional[str] = None
    clause:Optional[str] = None

# Extracted from Document Intelligence
class ContractFields(BaseModel):
    doc_type:Optional[str] = Field(default=None,alias="docType")
    contract_type:Optional[str] = Field(default=None, alias="contractType")
    title:Optional[str] = None
    effective_date:Optional[str] = Field(default=None,alias="effectiveDate")
    expiration_date:Optional[str] = Field(default=None,alias="expirationDate")
    parties:List[str] = []
    jurisdictions:List[str] = []

class Message(BaseModel):
    message:str

class Contract(BaseModel):
    record_id:str = Field(default=None, alias="recordId")
    data:ContractFields
    errors:Optional[Message] = None
    warnings:Optional[Message] = None

class DocumentOutput(BaseModel):
    values: List[Contract]    

In [46]:
doc_client = DocumentIntelligenceClient(
    endpoint=doc_endpoint,
    credential=AzureKeyCredential(doc_key)
)

In [47]:
blob = BlobClient(
            account_url=blob_account_url,
            container_name=container_name,
            blob_name=file_name,
            credential=DefaultAzureCredential()
       )

In [None]:
print(blob.url)

In [None]:
poller = await doc_client.begin_analyze_document(
    model_id="prebuilt-contract",
    body=AnalyzeDocumentRequest(url_source=blob.url),
    features=[DocumentAnalysisFeature.QUERY_FIELDS],
    query_fields=["ContractId", "ContractDuration", "RenewalDate"]
)


In [54]:
contract = await poller.result()

In [55]:
print(contract)

{'apiVersion': '2024-11-30', 'modelId': 'prebuilt-contract', 'stringIndexType': 'textElements', 'content': 'WEB HOSTING AGREEMENT\nThis Web Hosting Agreement is entered as of the 10 day of March, 2025 ("Effective Date") by and between Fourth Coffee, a corporation, having its principal place of business at 789 Business Ave, Tacoma, Washington 98402 ("Fourth") and CloudHost Systems, a corporation having its principal place of business at 654 Cloud Lane, Seattle, Washington 98103 ("CloudHost").\nThis agreement shall void and nullify any and all previous agreements to this date between Fourth and CloudHost.\nThere shall be no additional fees of any kind paid to Fourth, other than those listed within this agreement for web hosting services and/or bandwidth usage. The initial term of this contract is for 36 months with a maximum of 345,526 accesses thereafter payment shall be $0.019 (one-half cent) per access. CloudHost must monitor and remit this amount to Fourth by no later than Wednesday 

In [None]:
contract_fields: ContractFields = ContractFields()

doc = contract.documents[0]

# Extract doc_type
if doc.doc_type:
    contract_fields.doc_type = doc.doc_type

# Extract Title
title = doc.fields.get("Title")
if title:
    contract_fields.title = title.value_string

# Extract ContractId
contract_id = doc.fields.get("ContractId")
if contract_id:
    contract_fields.contract_id = contract_id.value_string

# Extract Parties
parties = doc.fields.get("Parties")
if parties and parties.value_array:
    contract_parties: List[Party] = []
    for party in parties.value_array:
        party_obj = Party()
        
        name = party.value_object.get("Name")
        if name:
            party_obj.name = name.value_string
        
        address = party.value_object.get("Address")
        if address:
            party_obj.address = address.value_address
        
        reference_name = party.value_object.get("ReferenceName")
        if reference_name:
            party_obj.reference_name = reference_name.value_string
        
        clause = party.value_object.get("Clause")
        if clause:
            party_obj.clause = clause.value_string
        
        contract_parties.append(party_obj)
    contract_fields.parties = contract_parties

# Extract ExecutionDate
execution_date = doc.fields.get("ExecutionDate")
if execution_date:
    contract_fields.execution_date = execution_date.value_date.isoformat() if execution_date.value_date else execution_date.content

# Extract EffectiveDate
effective_date = doc.fields.get("EffectiveDate")
if effective_date:
    contract_fields.effective_date = effective_date.value_date.isoformat() if effective_date.value_date else effective_date.content

# Extract ExpirationDate
expiration_date = doc.fields.get("ExpirationDate")
if expiration_date:
    contract_fields.expiration_date = expiration_date.value_date.isoformat() if expiration_date.value_date else expiration_date.content

# Extract ContractDuration
contract_duration = doc.fields.get("ContractDuration")
if contract_duration:
    contract_fields.contract_duration = contract_duration.value_string

# Extract RenewalDate
renewal_date = doc.fields.get("RenewalDate")
if renewal_date:
    contract_fields.renewal_date = renewal_date.value_date.isoformat() if renewal_date.value_date else renewal_date.content

# Extract Jurisdictions
jurisdictions = doc.fields.get("Jurisdictions")
if jurisdictions and jurisdictions.value_array:
    contract_jurisdictions: List[Jurisdiction] = []
    for jurisdiction in jurisdictions.value_array:
        contract_jurisdiction = Jurisdiction()
        
        region = jurisdiction.value_object.get("Region")
        if region:
            contract_jurisdiction.region = region.value_string

        clause = jurisdiction.value_object.get("Clause")
        if clause:
            contract_jurisdiction.clause = clause.value_string
        
        contract_jurisdictions.append(contract_jurisdiction)
    contract_fields.jurisdictions = contract_jurisdictions


In [57]:
print(contract_fields)

doc_type='contract' contract_type=None title='WEB HOSTING AGREEMENT' effective_date=None expiration_date=None parties=[None, None] jurisdictions=[Jurisdiction(region='Washington.', clause='This Agreement shall be governed by and construed in accordance with the internal laws of the state of Washington.')]


In [33]:
document_output = DocumentOutput(values=[])
document_output.values.append(contract_fields)

In [34]:
document_output.model_dump_json(indent=4)

'{\n    "values": [\n        {}\n    ]\n}'