In [1]:
PROJECT_ID = 'dwh-siloam'
REGION = 'asia-southeast1'
print(f"Project ID: {PROJECT_ID}\nRegion: {REGION}")

# Initialize Vertex AI
from pathlib import Path
import vertexai
from google.cloud import aiplatform

print(f"Checking Credentials...")
if not any((Path.cwd()/"service_account").glob('*.json')):
    print("Service account folder is empty. Fallback using default gcloud account")
    aiplatform.init(project=PROJECT_ID, location=REGION)
    vertexai.init(project=PROJECT_ID, location=REGION)
else:
    print('Using service account credentials from service_account folder')
    from google.oauth2 import service_account
    sa_file = list((Path.cwd()/"service_account").glob('*.json'))[0]
    print(f"Using service account file: {sa_file}")
    credentials = service_account.Credentials.from_service_account_file(sa_file)
    aiplatform.init(project=PROJECT_ID, location=REGION, credentials=credentials)
    vertexai.init(project=PROJECT_ID, location=REGION, credentials=credentials)

# Import libraries
from langchain_google_vertexai import VertexAI, ChatVertexAI, create_structured_runnable
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field

from typing import List, Optional
import requests

from langchain_core.messages import HumanMessage, AIMessage
from langchain_core.prompts import MessagesPlaceholder
from settings import CopilotSettings
import time

import io
from langchain_community.document_loaders import AzureAIDocumentIntelligenceLoader

Project ID: dwh-siloam
Region: asia-southeast1
Checking Credentials...
Using service account credentials from service_account folder
Using service account file: /Users/donnymirzaadhitama/workspace/others/chatbot-llm/service_account/dwh-siloam-99402e61edd2.json


In [2]:
config = CopilotSettings()

In [18]:
file_path = "/Users/donnymirzaadhitama/workspace/bithealth/training_data/FPKS/2022082011446651_MRCCC_FPKS_VALE INDONESIA PT_OPA2208190466_KAMRULLAH ILYAS-1.pdf"
in_file = io.BytesIO(
    open(file_path, 'rb').read()
)

In [17]:
# endpoint = config.AZURE_DOCS_INT_ENDPOINT
# key = config.AZURE_DOCS_INT_API_KEY
# analysis_features = ["ocrHighResolution"]
# loader = AzureAIDocumentIntelligenceLoader(
#     api_endpoint=endpoint,
#     api_key=key,
#     file_path=file_path,
#     api_model="prebuilt-document",
#     # analysis_features=analysis_features,
# )

# documents = loader.load()

In [6]:
"""
This code sample shows Prebuilt Document operations with the Azure Form Recognizer client library. 
The async versions of the samples require Python 3.6 or later.

To learn more, please visit the documentation - Quickstart: Form Recognizer Python client library SDKs
https://learn.microsoft.com/azure/applied-ai-services/form-recognizer/quickstarts/get-started-v3-sdk-rest-api?view=doc-intel-3.1.0&pivots=programming-language-python
"""

from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer import DocumentAnalysisClient

"""
Remember to remove the key from your code when you're done, and never post it publicly. For production, use
secure methods to store and access your credentials. For more information, see 
https://docs.microsoft.com/en-us/azure/cognitive-services/cognitive-services-security?tabs=command-line%2Ccsharp#environment-variables-and-application-configuration
"""
endpoint = config.AZURE_DOCS_INT_ENDPOINT
key = config.AZURE_DOCS_INT_API_KEY

# sample document
form_url = "https://raw.githubusercontent.com/Azure-Samples/cognitive-services-REST-api-samples/master/curl/form-recognizer/sample-layout.pdf"

document_analysis_client = DocumentAnalysisClient(
        endpoint=endpoint, credential=AzureKeyCredential(key)
    )
    
# poller = document_analysis_client.begin_analyze_document_from_url(
#     model_id="prebuilt-document",
#     document_url=form_url
# )
poller = document_analysis_client.begin_analyze_document(
    model_id="prebuilt-document",
    document=in_file
)

result = poller.result()

print("----Key-value pairs found in document----")
for kv_pair in result.key_value_pairs:
    if kv_pair.key and kv_pair.value:
        print("Key '{}': Value: '{}'".format(kv_pair.key.content, kv_pair.value.content))
    else:
        print("Key '{}': Value:".format(kv_pair.key.content))

print("----------------------------------------")


----Key-value pairs found in document----
Key 'Sex:': Value: 'Male /'
Key 'Ph:': Value: '085399799077'
Key 'MR No:': Value: 'MRCCC.00-42-94-47'
Key 'DOB:': Value: '31-Dec-1971'
Key 'Kepada TS :': Value: 'dr.'
Key 'Internal': Value: ':selected:'
Key 'Eksternal': Value: ':unselected:'
Key 'Mohon konsultasi dan tindak lanjut untuk pasien dengan :': Value: 'Keluhan utama
Bah- Kin'
Key 'Hasil pemeriksaan yang ditemukan
:': Value: 'difama
Km .'
Key 'Diagnosa
:': Value: 'Saf Smilde'
Key 'Konsultasi satu kali': Value: ':unselected:'
Key 'Rawat bersama': Value: ':selected:'
Key 'Alih rawat': Value: ':unselected:'
Key 'Tanggal :': Value: '1120'
----------------------------------------


In [16]:
# print(result.content)
print(result.key_value_pairs[0].key)
print(result.key_value_pairs[0].value)

DocumentKeyValueElement(content=Sex:, bounding_regions=[BoundingRegion(page_number=1, polygon=[Point(x=4.5482, y=1.3704), Point(x=4.8071, y=1.3704), Point(x=4.802, y=1.543), Point(x=4.5431, y=1.543)])], spans=[DocumentSpan(offset=74, length=4)])
DocumentKeyValueElement(content=Male /, bounding_regions=[BoundingRegion(page_number=1, polygon=[Point(x=4.8375, y=1.3704), Point(x=5.2385, y=1.3738), Point(x=5.2371, y=1.5413), Point(x=4.8361, y=1.5379)])], spans=[DocumentSpan(offset=79, length=6)])
