In [None]:
#%pip install azure-ai-documentintelligence azure-cognitiveservices-speech openai python-dotenv


In [2]:
# import libraries
import os
import re
import requests
import sys
import json
from azure.core.credentials import AzureKeyCredential
from azure.ai.documentintelligence import DocumentIntelligenceClient
from azure.ai.documentintelligence.models import AnalyzeResult
from azure.ai.documentintelligence.models import AnalyzeDocumentRequest
from openai import AzureOpenAI
import azure.cognitiveservices.speech as speechsdk
from dotenv import load_dotenv

# Read configurations and setup the sdk clients
load_dotenv()
openai_model = os.getenv("AZURE_OPENAI_MODEL")
input_path = "./input"
output_path = "./output"

document_intelligence_key = os.getenv("DOCUMENT_INTELLIGENCE_KEY")
azure_openai_key = os.getenv("AZURE_OPENAI_KEY")
azure_speech_key = os.getenv("AZURE_SPEECH_KEY")

if not all([document_intelligence_key, azure_openai_key, azure_speech_key]):
    raise ValueError("One or more Azure keys are not set in the environment variables.")

document_intelligence_client = DocumentIntelligenceClient(
    endpoint = os.getenv("DOCUMENT_INTELLIGENCE_ENDPOINT"), 
    credential = AzureKeyCredential(document_intelligence_key)
)

aoai_client = AzureOpenAI(
  azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT"), 
  api_key = azure_openai_key,  
  api_version = os.getenv("AZURE_OPENAI_VERSION")
)

speech_config = speechsdk.SpeechConfig(
  subscription=azure_speech_key,
  region=os.getenv("AZURE_SPEECH_REGION"),
)
speech_config.speech_synthesis_voice_name='en-US-AvaMultilingualNeural'


In [3]:
# Define your prompts

system_prompt_template = """You are an AI assistant that is helping to extract text from a document and then turn that text back into speech.
The document has been processed with the document intelligence service and you will be provided with the JSON output from that analysis.
- Find and extract text exactly as supplied without altering.
- Do not be creative.
- Your only role is to extract the desired information.
- Do not extract more than is requested.
- Use minimal markdown formatting so each key and its corresponding value can be spoken correctly.
- Don't bold anything.
- If you see a single 'o' character at the beginning of a line, like a bullet mark, replace it with a single dash, '-', so it is compatible with markdown."""

user_prompt_template = """"Please extract the Bill Number, Sponsor, and Bill Summary from the following json.  
Make sure the Bill Summary includes all content in the Bill Summary section, up to but not including the Current Law section.
Document JSON:
{content}"""


In [6]:
# Define a function to extract content from a local document
def extract_content(file_path):
    with open(file_path, 'rb') as f:
        poller = document_intelligence_client.begin_analyze_document(
            "prebuilt-layout", body=f, content_type="application/pdf"
        )

    result: AnalyzeResult = poller.result()
    
    return result

# Define function to call Azure OpenAI
def extract_fields_with_openai(system_prompt, user_prompt):
    response = aoai_client.chat.completions.create(
        model = os.getenv("AZURE_OPENAI_MODEL"),
        messages = [{"role":"system","content":system_prompt},{"role":"user","content":user_prompt}],
    )

    generated_response = response.choices[0].message.content.strip()
    return generated_response

# Define a function to convert text to speech
def text_to_speech(text, output_file):
    # Synthesize to audio data stream
    audio_config = speechsdk.audio.AudioOutputConfig(filename=output_file)
    speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)
    speech_synthesis_result = speech_synthesizer.speak_text_async(text).get()
    print(f"Audio content written to file {output_file}")


def clean_analyze_result(analyze_result):
    def remove_elements(element, keys_to_remove):
        if isinstance(element, dict):
            for key in keys_to_remove:
                if key in element:
                    del element[key]
            for key, value in element.items():
                remove_elements(value, keys_to_remove)
        elif isinstance(element, list):
            for item in element:
                remove_elements(item, keys_to_remove)

    analyze_result_dict = analyze_result.as_dict()
    
    # Remove boundingRegions and words elements recursively
    keys_to_remove = ['boundingRegions', 'words', 'polygon']
    remove_elements(analyze_result_dict, keys_to_remove)
    
    return analyze_result_dict

# Define a function to save extracted data to a file
def save_data(data, output_file):
    with open(output_file, 'w') as f:
        f.write(data)
        


In [None]:
# Process all documents in the input directory

# Clear the output directory
for file in os.listdir(output_path):
    file_path = os.path.join(output_path, file)
    os.remove(file_path)

for filename in os.listdir(input_path):
    file_path = os.path.join(input_path, filename)
    output_filename = os.path.join(output_path, f"{os.path.splitext(filename)[0]}")

    raw_json_path = output_filename + ".raw.json"
    clean_json_path = output_filename + ".clean.json"
    txt_path = output_filename + ".txt"
    wav_path = output_filename + ".wav"

    # Get the content of the document. Result is in json format.
    docContent = extract_content(file_path)
    save_data(json.dumps(docContent.as_dict()), raw_json_path)

    # Clean the json content to reduce it's (token) size
    cleanDocContent = clean_analyze_result(docContent)
    save_data(json.dumps(cleanDocContent), clean_json_path)

    # Extract the content from the json file using genai
    prompt = user_prompt_template.format(content=cleanDocContent)
    extracted_data = extract_fields_with_openai(system_prompt_template, prompt)
    save_data(extracted_data, txt_path)

    # Convert the extracted data to speech
    text_to_speech(extracted_data, wav_path)


In [107]:
# # Process all documents in the input directory

# # Comment out the empty folder lines
# # for file in os.listdir(output_path):
# #     file_path = os.path.join(output_path, file)
# #     os.remove(file_path)

# for filename in os.listdir(input_path):
#     file_path = os.path.join(input_path, filename)
#     output_filename = os.path.join(output_path, f"{os.path.splitext(filename)[0]}")

#     raw_json_path = output_filename + ".raw.json"
#     clean_json_path = output_filename + ".clean.json"
#     txt_path = output_filename + ".txt"
#     wav_path = output_filename + ".wav"


#     with open(clean_json_path, 'r') as f:
#         cleanDocContent = json.load(f)

#     # Extract the content from the json file using genai
#     prompt = user_prompt_template.format(content=cleanDocContent)
#     extracted_data = extract_fields_with_openai(system_prompt_template, prompt)
#     save_data(extracted_data, txt_path)
