# Refactored SampleProject.py

This notebook is a refactored version of the `SampleProject.py` file. It organizes the code into sections, removes redundancy, and provides clear descriptions for each function.

## Import Required Libraries

This section imports all the necessary libraries and modules used throughout the project.

In [None]:
import openai
import os
import json
import httpx
import csv
import pandas as pd
import numpy as np
from dotenv import load_dotenv
from tenacity import (
    retry, 
    stop_after_attempt,
    wait_random_exponential
)
from langchain.schema import AIMessage, HumanMessage, SystemMessage
from langchain_core.tools import tool
from langchain.agents import AgentExecutor
from langchain_openai import AzureOpenAI
from typing import TypedDict, List, Union
from datetime import datetime

## Load Environment Variables

This section loads environment variables required for the project, such as API keys and endpoints.

In [None]:
load_dotenv('./Data/UIAS_vars.env')
endpoint = os.getenv("MODEL_ENDPOINT")
model_name = os.getenv("MODEL_NAME")
project_id = os.getenv("PROJECT_ID")
api_version = os.getenv("API_VERSION")

## Define Utility Functions

This section defines utility functions used throughout the project, such as `get_access_token` and `calculate_age`.

In [None]:
def get_access_token():
    """
    Retrieve an access token from the authentication server.

    Returns:
        str: The access token retrieved from the server.
    """
    auth = "https://api.uhg.com/oauth2/token"
    scope = "https://api.uhg.com/.default"
    grant_type = "client_credentials"

    with httpx.Client() as client:
        body = {
            "client_id": dbutils.secrets.get(scope="AIML_Training", key="client_id"),
            "client_secret": dbutils.secrets.get(scope="AIML_Training", key="client_secret"),
            "scope": scope,
            "grant_type": grant_type
        }
        headers = {
            "Content-Type": "application/x-www-form-urlencoded"
        }
        response = client.post(auth, data=body, headers=headers)
        access_token = response.json().get("access_token")
        return access_token

def calculate_age(date_of_birth: str, date_of_service: str) -> int:
    """
    Calculate the age of a patient based on their date of birth and the date of service.

    Args:
        date_of_birth (str): The patient's date of birth in YYYY-MM-DD format.
        date_of_service (str): The date of service in YYYY-MM-DD format.

    Returns:
        int: The calculated age of the patient.
    """
    birth_date = datetime.strptime(date_of_birth, "%Y-%m-%d")
    service_date = datetime.strptime(date_of_service, "%Y-%m-%d")

    age = service_date.year - birth_date.year
    if (service_date.month, service_date.day) < (birth_date.month, birth_date.day):
        age -= 1

    return age

## Define Core Functions

This section includes the core functions used for summarizing patient records, summarizing policy guidelines, and checking claim coverage.

In [None]:
@retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))
def query_llm(prompt_messages, max_tokens=4096, temperature=1.0, top_p=1.0):
    """
    Query the language model with the given prompt messages.

    Args:
        prompt_messages (list): A list of messages to send to the LLM.
        max_tokens (int): Maximum number of tokens to generate.
        temperature (float): Sampling temperature.
        top_p (float): Nucleus sampling probability.

    Returns:
        str: The content of the LLM's response.
    """
    response = chat_client.chat.completions.create(
        model=model_name,
        messages=prompt_messages,
        max_tokens=max_tokens,
        temperature=temperature,
        top_p=top_p
    )
    return response.choices[0].message['content']

In [None]:
def summarize_patient_record(record_str: str):
    """
    Summarize the patient record in a structured format, enriched with human-readable descriptions
    of medical codes using ICD-10 and CPT mappings. The patient demographics section includes only
    name, gender, and age.
    
    Args:
        record_str (str): The patient record as a JSON string.

    Returns:
        str: The summarized patient record.
    """
    reference_codes_json = json.dumps(reference_codes_data)

    try:
        record = json.loads(record_str)
        name = record.get("name", "Unknown")
        gender = record.get("gender", "Unknown")
        date_of_birth = record.get("date_of_birth")
        date_of_service = record.get("date_of_service")

        age = calculate_age(date_of_birth, date_of_service) if date_of_birth and date_of_service else "Unknown"

        record["patient_demographics"] = {
            "name": name,
            "gender": gender,
            "age": age
        }
        record_str = json.dumps(record)

    except Exception as e:
        return f"Error processing the patient record: {str(e)}"

    prompt_messages = [
        {
            "role": "developer",
            "content": (
                "You are a helpful assistant that summarizes patient records. "
                "Use the provided ICD-10 and CPT code mappings to include human-readable descriptions "
                "of medical codes. The summary should include the following sections:\n"
                "- Patient Demographics (only name, gender, and age)\n"
                "- Insurance Policy\n"
                "- Diagnoses (with descriptions)\n"
                "- Procedures (with descriptions)\n"
                "- Preauthorization Status\n"
                "- Billed Amount (in USD)\n"
                "- Date of Service\n\n"
                "The output must be in a bullet-point format or clearly labeled sections.\n\n"
                "Here are the ICD-10 and CPT code mappings for your reference:\n"
                f"{reference_codes_json}"
            ),
        },
        {
            "role": "user",
            "content": f"Summarize the following patient record:\n\n{record_str}\n\nSummary:",
        },
    ]

    response = query_llm(prompt_messages)
    return response

In [None]:
def summarize_policy_guidelines(policy_str: str):
    """
    Summarize the insurance policy guidelines in a structured format, enriched with human-readable
    descriptions of medical and procedure codes using ICD-10 and CPT mappings.

    Args:
        policy_str (str): The policy guidelines as a JSON string.

    Returns:
        str: The summarized policy guidelines.
    """
    reference_codes_json = json.dumps(reference_codes_data)

    prompt_messages = [
        {
            "role": "developer",
            "content": (
                "You are a helpful assistant that summarizes insurance policy guidelines. "
                "Use the provided ICD-10 and CPT code mappings to include human-readable descriptions "
                "of medical and procedure codes. The summary should include the following sections:\n"
                "- Policy Details: Include policy ID and plan name.\n"
                "- Covered Procedures: For each covered procedure, include:\n"
                "  - Procedure Code and Description (using CPT code mappings)\n"
                "  - Covered Diagnoses and Descriptions (using ICD-10 code mappings)\n"
                "  - Gender Restriction\n"
                "  - Age Range\n"
                "  - Preauthorization Requirement\n"
                "  - Notes on Coverage (if any)\n"
                "Each procedure should be presented as a separate entry under the 'Covered Procedures' section, "
                "with the required sub-points clearly listed.\n\n"
                "The output must be in a bullet-point format or clearly labeled sections.\n\n"
                "Here are the ICD-10 and CPT code mappings for your reference:\n"
                f"{reference_codes_json}"
            ),
        },
        {
            "role": "user",
            "content": f"Summarize the following insurance policy guidelines:\n\n{policy_str}\n\nSummary:",
        },
    ]

    response = query_llm(prompt_messages)
    return response

In [None]:
def check_claim_coverage(record_summary, policy_summary):
    """
    Evaluate claim coverage based on the provided patient record summary and policy guideline summary.
    The evaluation includes step-by-step checks and a final decision.

    Args:
        record_summary (str): The summarized patient record.
        policy_summary (str): The summarized policy guidelines.

    Returns:
        str: The evaluation result in JSON format.
    """
    prompt_messages = [
        {
            "role": "developer",
            "content": (
                "You are an insurance claim processing assistant that evaluates claim coverage. "
                "Follow these steps to evaluate the claim:\n"
                "1. Check if the patient's diagnosis code(s) match the policy-covered diagnoses for the claimed procedure.\n"
                "2. Verify that the procedure code is explicitly listed in the policy and all associated conditions are satisfied.\n"
                "3. Ensure the patient's age falls within the policy's defined age range (inclusive of the lower bound, exclusive of the upper bound).\n"
                "4. Confirm that the patient's gender matches the policy's requirement for the procedure.\n"
                "5. If preauthorization is required by the policy, ensure it has been obtained.\n"
                "Only procedures and diagnoses explicitly listed in the patient record should be evaluated. "
                "For simplicity, assume there is only one procedure per patient.\n\n"
                "The output should include the following sections:\n"
                "- Coverage Review: Step-by-step analysis for the claimed procedure, detailing the checks performed.\n"
                "- Final Decision: For the procedure, return either 'APPROVE' or 'ROUTE FOR REVIEW' with a brief explanation of the reasoning behind it.\n"
            ),
        },
        {
            "role": "user",
            "content": f"""Evaluate the following claim:

Patient Record Summary:
{record_summary}

Insurance Policy Summary:
{policy_summary}

Please respond in JSON format with the following structure:
{{
    "coverage_review": "Step-by-step analysis of the checks performed.",
    "summary_of_findings":"summary of which coverage requirements were met or not met",
    "final_decision": {{
        "decision": "APPROVE or ROUTE FOR REVIEW",
        "reasoning": "Brief explanation of the decision."
    }}
}}
"""
        },
    ]

    response = query_llm(prompt_messages)
    return response

## Load Data Files

This section loads the required data files, such as reference codes, evaluation datasets, and test datasets.

In [None]:
# Load data files
reference_codes_path = "./data/reference_codes.json"
evaluation_dataset_path = "./data/evaluation_dataset.json"
test_dataset_path = "./data/test_dataset.json"
insurance_policies_path = "./data/insurance_policies.json"

with open(reference_codes_path, 'r') as file:
    reference_codes_data = json.load(file)

with open(evaluation_dataset_path, 'r') as file:
    evaluation_dataset_data = json.load(file)

with open(test_dataset_path, 'r') as file:
    test_dataset_data = json.load(file)

with open(insurance_policies_path, 'r') as file:
    insurance_policies_data = json.load(file)

## Tool Creation

This section defines the tools used by the agent, such as summarizing patient records, summarizing policy guidelines, and checking claim coverage.

In [None]:
@tool
def summarize_patient_record_tool(record_str: str) -> str:
    return summarize_patient_record(record_str)

@tool
def summarize_policy_guideline_tool(policy_str: str):
    return summarize_policy_guidelines(policy_str)

@tool
def check_claim_coverage_tool(record_summary: str, policy_summary: str) -> dict:
    return check_claim_coverage(record_summary, policy_summary)

## ReAct Agent Creation

This section creates the ReAct-style agent using the tools and system prompt.

In [None]:
SYSTEM_PROMPT = """
You are an AI assistant designed to process health insurance claims. You have access to the following tools:

1. summarize_patient_record_tool: Takes a patient record string and returns a concise summary.
2. summarize_policy_guideline_tool: Takes a policy ID and returns a summary of the insurance policy.
3. check_claim_coverage_tool: Takes a record summary and policy summary and determines if the claim should

Follow this exact workflow for each claim:
1. First, summarize the patient record using summarize_patient_record_tool
2. Next, get the relevant insurance policy summary using summarize_policy_guideline_tool
3. Finally, check if the claim should be covered using check_claim_coverage_tool

Your final response must follow this format:
Decision: [Approved/Denied]
Reason: [clear explanation of the decision based on the record and policy]

Make sure to complete all steps in order for every claim you process.
"""

def create_insurance_agent():
    """
    Create and return the insurance claims processing agent.

    Returns:
        AgentExecutor: The configured ReAct-style agent.
    """
    tools = [
        summarize_patient_record_tool,
        summarize_policy_guideline_tool,
        check_claim_coverage_tool
    ]

    agent = create_react_agent(
        tools=tools,
        system_prompt=SYSTEM_PROMPT,
        single_agent=True
    )
    return agent

## Agent Evaluation

This section evaluates the agent using the evaluation dataset and test dataset.

In [None]:
def run_agent_validation():
    agent = create_insurance_agent()
    results = []
    for record in evaluation_dataset_data:
        try:
            patient_record = record["patient_record"]
            policy_guidelines = record["policy_guidelines"]

            decision = agent.run({
                "patient_record": patient_record,
                "policy_guidelines": policy_guidelines
            })

            results.append({
                "patient_record": patient_record,
                "policy_guidelines": policy_guidelines,
                "decision": decision
            })
        except Exception as e:
            results.append({
                "patient_record": record["patient_record"],
                "policy_guidelines": record["policy_guidelines"],
                "decision": "ROUTE FOR REVIEW",
                "reasoning": str(e)
            })

    with open("agent_validation_results.json", "w") as f:
        json.dump(results, f, indent=4)

    return results

def run_final_agent_evaluation():
    agent = create_insurance_agent()
    results = []
    for record in test_dataset_data:
        try:
            patient_record = record["patient_record"]
            policy_guidelines = record["policy_guidelines"]

            decision = agent.run({
                "patient_record": patient_record,
                "policy_guidelines": policy_guidelines
            })

            results.append({
                "patient_record": patient_record,
                "policy_guidelines": policy_guidelines,
                "decision": decision["decision"],
                "reasoning": decision["reasoning"]
            })
        except Exception as e:
            results.append({
                "patient_record": record["patient_record"],
                "policy_guidelines": record["policy_guidelines"],
                "decision": "ROUTE FOR REVIEW",
                "reasoning": str(e)
            })

    with open("submission.csv", "w", newline="") as csvfile:
        fieldnames = ["patient_record", "policy_guidelines", "decision", "reasoning"]
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

        writer.writeheader()
        writer.writerows(results)

    return results