In [27]:
import os
from azure.core.credentials import AzureKeyCredential
from azure.ai.documentintelligence import DocumentIntelligenceClient
from azure.ai.documentintelligence.models import AnalyzeDocumentRequest

## Initialize variables from environmental config file

In [28]:
endpoint = os.environ["DOCUMENTINTELLIGENCE_ENDPOINT"]
key = os.environ["DOCUMENTINTELLIGENCE_API_KEY"]
aoai_endpoint = os.environ["ENDPOINT_URL"]
deployment = "gpt-4o-2"
subscription_key = os.environ["AZURE_OPENAI_API_KEY"]

## Process the image file using Layout model

In [29]:
import os
from azure.core.credentials import AzureKeyCredential
from azure.ai.documentintelligence import DocumentIntelligenceClient

# 1. Configure these as environment variables (or hard-code for testing)
#endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
#key      = os.environ["AZURE_FORM_RECOGNIZER_KEY"]

# 2. Create the client
document_intelligence_client = DocumentIntelligenceClient(
    endpoint=endpoint, 
    credential=AzureKeyCredential(key)
)

# 3. Path to your local document
#    Can be .pdf, .jpg, .png, .tiff, etc.
local_path = "2th_sem.jpg"

# 4. Open and analyze the file with the prebuilt-layout model
with open(local_path, "rb") as f:
    poller = document_intelligence_client.begin_analyze_document(
        "prebuilt-layout", f,content_type="image/jpeg"
    )
result = poller.result()

# 5. Inspect the results

# 5a. Handwriting / print detection
for idx, style in enumerate(result.styles):
    print(
        f"Page contains {'handwritten' if style.is_handwritten else 'no handwritten'} content"
    )

# 5b. Lines and selection marks per page
for page in result.pages:
    print(f"\n--- Page {page.page_number} ---")
    for line_idx, line in enumerate(page.lines):
        print(f"...Line {line_idx}: {line.content}")
    for sel in page.selection_marks:
        print(f"...Selection mark '{sel.state}' (confidence: {sel.confidence:.2f})")

# 5c. Tables
for table_idx, table in enumerate(result.tables):
    print(f"\nTable #{table_idx} — {table.row_count} rows x {table.column_count} cols")
    # Reconstruct table grid
    grid = [["" for _ in range(table.column_count)] for _ in range(table.row_count)]
    for cell in table.cells:
        grid[cell.row_index][cell.column_index] = cell.content
    # Print as Markdown-style table
    header = " | ".join(grid[0])
    separator = " | ".join(["---"] * table.column_count)
    print(f"| {header} |")
    print(f"| {separator} |")
    for row in grid[1:]:
        print(f"| {' | '.join(row)} |")


Page contains handwritten content
Page contains handwritten content
Page contains handwritten content

--- Page 1 ---
...Line 0: SI.No .: 0752151
...Line 1: ADIKAVI NANNAYA UNIVERSITY
...Line 2: OFFICIAL MEMORANDUM NO: (Supdt.) E-III /001/2018/Dated:25-07-2018
...Line 3: STATEMENT OF GRADES
...Line 4: B.Sc. CBCS SCHEME
...Line 5: DEGREE EXAMINATIONS AT THE END OF SECOND SEMESTER - MAR 2018
...Line 6: NAME OF THE CANDIDATE: TUMMALLAPALLI ANUSHA
...Line 7: THE FOLLOWING GRADES WERE SECURED BY THE CANDIDATE:
...Line 8: REGISTER NO: 174107102074
...Line 9: SUBJECTS
...Line 10: CREDITS
...Line 11: GRADE
...Line 12: POINTS
...Line 13: GRADE
...Line 14: POINTS
...Line 15: ENGLISH - II
...Line 16: 3
...Line 17: A+
...Line 18: 9
...Line 19: 27
...Line 20: SANSKRIT - II
...Line 21: 3
...Line 22: 0
...Line 23: 10
...Line 24: 30
...Line 25: FOUNDATION COURSES -INFORMATION AND COMMUNICATION
...Line 26: TECHNOLOGY-I
...Line 27: 2
...Line 28: B+
...Line 29: 7
...Line 30: 14
...Line 31: FOUNDATION COU

## Extract subject and grades from the output of Layout Model analysis

In [31]:
import os
import json
from openai import AzureOpenAI

aoai_endpoint = os.environ["ENDPOINT_URL"]
deployment = "gpt-4o-2"
subscription_key = os.environ["AZURE_OPENAI_API_KEY"]

# Initialize Azure OpenAI client with key-based authentication
client = AzureOpenAI(
    azure_endpoint=aoai_endpoint,
    api_key=subscription_key,
    api_version="2025-01-01-preview",
)


#Prepare the chat prompt
chat_prompt = [
    {
        "role": "system",
        "content": [
            {
                "type": "text",
                "text": result.content
            }
        ]
    },
    {
        "role": "user",
        "content": [
            {
                "type": "text",
                "text": "Extract the values for Subjects, Credits, Grades, Points & Grade Points. Do not include any other information. The output should be a well formatted JSON object:\n\n"
            }
        ]
    }
]

# Include speech result if speech is enabled
messages = chat_prompt

# Generate the completion
completion = client.chat.completions.create(
    model=deployment,
    response_format={ "type": "json_object" },
    messages=messages,
    max_tokens=800,
    temperature=0.7,
    top_p=0.95,
    frequency_penalty=0,
    presence_penalty=0,
    stop=None,
    stream=False
)

print(completion.choices[0].message.content)
    


{
  "subjects": [
    {
      "name": "ENGLISH - II",
      "credits": 3,
      "grade": "A+",
      "points": 9,
      "grade_points": 27
    },
    {
      "name": "SANSKRIT - II",
      "credits": 3,
      "grade": "O",
      "points": 10,
      "grade_points": 30
    },
    {
      "name": "FOUNDATION COURSES - INFORMATION AND COMMUNICATION TECHNOLOGY-I",
      "credits": 2,
      "grade": "B+",
      "points": 7,
      "grade_points": 14
    },
    {
      "name": "FOUNDATION COURSES - COMMUNICATION AND SOFT SKILLS- 1",
      "credits": 2,
      "grade": "A",
      "points": 8,
      "grade_points": 16
    },
    {
      "name": "SOLID GEOMETRY",
      "credits": 5,
      "grade": "A",
      "points": 8,
      "grade_points": 40
    },
    {
      "name": "WAVES AND OSCILLATIONS",
      "credits": 3,
      "grade": "C",
      "points": 5,
      "grade_points": 15
    },
    {
      "name": "WAVES AND OSCILLATIONS PRACTICALS",
      "credits": 2,
      "grade": "O",
      "points"