## Document Understanding
documentation:  https://docs.oracle.com/en-us/iaas/Content/document-understanding/using/home.htm
slack: #document-understanding

## Import Libraries

In [2]:
import oci
import io
import uuid
import sys
from oci.object_storage import ObjectStorageClient

## Set input variables

In [4]:
CONFIG_PROFILE = "AIWORKSHOP" 

COMPARTMENT_ID= "ocid1.compartment.oc1..aaaaaaaaxj6fuodcmai6n6z5yyqif6a36ewfmmovn42red37ml3wxlehjmga" 
NAMESPACE = "axaemuxiyife"
BUCKET_NAME = "workshopbucket"
PREFIX = "AAGARWA" # folder in the bucker where your file is uplaoded. change to your oracle id
FILE_PATH = "reciept.png" # path to the local file to be uploaded for anaysis 
FILE_NAME ="reciept.png"


## Load OCI config
Set up authentication for OCI by reading configuration from a file and creating a signer instance for secure API communication. The default configuration file location is ```~/.oci/config```.

In [5]:
config = oci.config.from_file('~/.oci/config', profile_name=CONFIG_PROFILE)

## OPTIONAL : upload file  

In [None]:
object_storage_client = ObjectStorageClient(config)
print(f"Uploading file {FILE_PATH} ...")
object_storage_client.put_object(NAMESPACE, BUCKET_NAME, FILE_NAME, io.open(FILE_PATH,'rb'))
print("Upload completed !")

## Create a callback function to print the status

In [6]:
def create_processor_job_callback(times_called, response):
    print("Waiting for processor lifecycle state to go into succeeded state:", response.data)
    

## Create AI service vision client

In [7]:
dus_client = oci.ai_document.AIServiceDocumentClientCompositeOperations(oci.ai_document.AIServiceDocumentClient(config=config))

## Setup  features to be analyzed 
You can specify the features you want to call. In the below code we are calling all the features

In [8]:
features = [ oci.ai_document.models.DocumentClassificationFeature(),
            oci.ai_document.models.DocumentLanguageClassificationFeature(), 
            oci.ai_document.models.DocumentKeyValueExtractionFeature(),
            oci.ai_document.models.DocumentTableExtractionFeature(),
            oci.ai_document.models.DocumentTextExtractionFeature()
            ]

## Setup input location

In [9]:
input_location = oci.ai_document.models.ObjectLocation()
input_location.namespace_name = NAMESPACE 
input_location.bucket_name = BUCKET_NAME  
input_location.object_name = FILE_NAME

## Setup output location

In [10]:
output_location = oci.ai_document.models.OutputLocation()
output_location.namespace_name = NAMESPACE 
output_location.bucket_name = BUCKET_NAME  
output_location.prefix = PREFIX

## Create image job 

In [11]:
display_name = f"{PREFIX}_job_{uuid.uuid4()}" 
job_details = oci.ai_document.models.CreateProcessorJobDetails(
                    display_name=display_name,
                    compartment_id=COMPARTMENT_ID,
                    input_location=oci.ai_document.models.ObjectStorageLocations(object_locations=[input_location]),
                    output_location= output_location,
                    processor_config=oci.ai_document.models.GeneralProcessorConfig(features=features))

## Job in progress
The job progress is tracked till completion with an interval of 5 seconds.

In [12]:
dus_client = oci.ai_document.AIServiceDocumentClientCompositeOperations(oci.ai_document.AIServiceDocumentClient(config=config))

features = [ oci.ai_document.models.DocumentClassificationFeature(),
            oci.ai_document.models.DocumentLanguageClassificationFeature(), 
            oci.ai_document.models.DocumentKeyValueExtractionFeature(),
            oci.ai_document.models.DocumentTableExtractionFeature(),
            oci.ai_document.models.DocumentTextExtractionFeature()
            ]
processor= dus_client.create_processor_job_and_wait_for_state(
    create_processor_job_details=job_details,
    wait_for_states=[oci.ai_document.models.ProcessorJob.LIFECYCLE_STATE_SUCCEEDED],
    waiter_kwargs={"wait_callback": create_processor_job_callback})

## Get response json from object storage
The output can be found in the output location specified or it can be saved in ```output.json``` file by running the below cell

In [None]:
print(f"processor call succeeded with status: {[processor.status]} and request_id: {processor.request_id}.")
processor_job: oci.ai_document.models.ProcessorJob = processor.data

print("Getting result json from the output_location")
object_storage_client = oci.object_storage.ObjectStorageClient(config=config)
get_object_response = object_storage_client.get_object(namespace_name=NAMESPACE,
                                                       bucket_name=BUCKET_NAME,
                                                       object_name=f"{PREFIX}/{processor_job.id}/{NAMESPACE}_{BUCKET_NAME}/results/{FILE_NAME}.json")

print(str(get_object_response.data.content.decode()))

## Exercise 1 : Dcument Validator

1. Create a document in PowerPoint with
    * Name
    * Address
    * Dates 
    * Create date
    * Expiry date
    * Signature

1. Save it as an image 

1. Use Document Understanding & LLM service to validate
    *  is on correct name
    *  is on correct address
    *  is not expired
    *  has a signature



## Exercise 2 : Form Filler

1. Upload a receipt 
1. Fill out a expense report based on the receipt 
    *  Image an expense resporr as an multi line f string for simplicity
