In [1]:
import numpy as np
import pandas as pd
import os
import csv
import re
from dotenv import load_dotenv
import json
from openai import OpenAI
from urllib.parse import urlparse
import hashlib
import pdfkit
import requests
from playwright.sync_api import sync_playwright
from openai import OpenAI
import json
from bs4 import BeautifulSoup
from urllib.parse import quote_plus
from itertools import product
import traceback

In [2]:
path = '/home/cptaswadu/RESCUE-n8n/insurance'
load_dotenv(dotenv_path=os.path.join(path, ".env"))
openai_api_key = os.getenv("OPEN_AI_API_KEY")
perplexity_api_key = os.getenv("PERPLEXITY_API_KEY")
chatgpt_client = OpenAI(api_key=openai_api_key)

In [3]:
def load_inputs(case_ex_path, ground_truth_path):
    with open(case_ex_path, "r") as f:
        case_ex = json.load(f)

    with open(ground_truth_path, "r") as f:
        ground_truth = json.load(f)

    return case_ex, ground_truth

case_path = "/home/cptaswadu/RESCUE-n8n/insurance/dataset/case_ex.json"
truth_path = "/home/cptaswadu/RESCUE-n8n/insurance/dataset/ground_truth.json"

case_ex, ground_truth = load_inputs(case_path, truth_path)

In [4]:
def clean_json_response(response_text):
    original = response_text.strip()

    # Step 0: Check for hallucinated greeting (Perplexity fallback)
    if "how can I assist you" in original.lower() or "insurance-related questions" in original.lower():
        raise ValueError("Perplexity returned generic assistant response instead of JSON.")

    # Step 1: Try direct parsing
    try:
        return json.loads(original)
    except json.JSONDecodeError:
        pass

    # Step 2: Remove code block wrappers
    cleaned = re.sub(r"^```(?:json)?\s*|\s*```$", "", original, flags=re.IGNORECASE).strip()
    try:
        return json.loads(cleaned)
    except json.JSONDecodeError:
        pass

    # Step 3: Try to extract the first {...} JSON-like block
    match = re.search(r"(\{[\s\S]*?\})", original)
    if match:
        try:
            return json.loads(match.group(1))
        except json.JSONDecodeError:
            pass

    raise ValueError("No valid JSON found in the response.")

In [5]:
def find_policy_summarize(patient_info_text, model="chatgpt", chatgpt_client=None, perplexity_api_key=None, max_retries=3):
    prompt = f"""
You are a clinical insurance assistant specializing in genetic testing coverage.

You will be provided with:
1. Patient clinical information (including clinical history, insurance provider, and location).
2. Your task is to first find the most relevant and up-to-date official insurance policy document that addresses **genetic testing coverage** for this patient.
3. Then, read and extract ONLY the key policy criteria that apply to this patient's specific situation.

==== PATIENT INFORMATION ====
{patient_info_text}

Instructions:

🔹 Step 1: Search
- Identify the **most relevant official URL** of the insurance policy document based on the patient’s insurance provider, genetic test type, and state of residence.
- The document must include criteria such as: **medical necessity**, **clinical guidelines**, **age restrictions**, **prior authorization requirements**, or **test-specific coverage rules**.
- If no relevant document is found, respond with: "No policy found."
- If a document is found, return its **direct URL** under: `policy_url`

🔹 Step 2: Extract Relevant Rules
- Read the document at the retrieved `policy_url`.
- Extract and summarize only the meaningful coverage criteria that apply to **this specific patient** based on their clinical profile.
- Focus on:
  - Age requirements
  - Medical necessity definition
  - Clinical guideline adherence (e.g., ACMG, NCCN)
  - Prior authorization requirements
  - Any other genetic test-specific coverage conditions
- Exclude navigation menus, legal disclaimers, and irrelevant sections.
- If no relevant rules are found or the page doesn't load, return: "No relevant coverage criteria found."

Return your answer in the following JSON format:

```json
{{
  "policy_url": "[INSERT POLICY URL OR 'No policy found']",
  "policy_summary": "[INSERT SUMMARY TEXT OR 'No relevant coverage criteria found']"
}}
"""
    messages = [
        {"role": "system", "content": "You are a clinical insurance assistant."},
        {"role": "user", "content": prompt}
    ]

    def call_chatgpt():
        response = chatgpt_client.responses.create(
            model="gpt-4o",
            input=messages,
            tools=[{"type": "web_search_preview"}]
        )
        return response.output_text.strip()

    def call_perplexity():
        headers = {
            "Authorization": f"Bearer {perplexity_api_key}",
            "Content-Type": "application/json"
        }
        data = {
            "model": "sonar-pro",
            "messages": messages,
            "temperature": 0
        }
        url = "https://api.perplexity.ai/chat/completions"
        res = requests.post(url, headers=headers, json=data)
        return res.json()["choices"][0]["message"]["content"].strip()

    for attempt in range(1, max_retries + 1):
        try:
            print(f"🔁 Attempt {attempt} ({model})...")
            result_text = call_perplexity() if model == "perplexity" else call_chatgpt()
            print("🔎 Raw result from LLM:\n", result_text)

            result_json = clean_json_response(result_text)
            if "policy_url" not in result_json or "policy_summary" not in result_json:
                raise ValueError("Missing expected keys in response.")

            return result_json

        except Exception as e:
            print(f"❌ Attempt {attempt} failed: {e}")

    return {
        "policy_url": "No policy found",
        "policy_summary": "No relevant coverage criteria found"
    }

In [6]:
def format_question_block(q, indent=2):
    indent_str = " " * indent
    question_line = f"{q['question']}"

    if q.get("options") == ["Free text answer"]:
        question_line += f"\n{indent_str}(Free text answer allowed.)"
    else:
        question_line += f"\n{indent_str}Options: {', '.join(q['options'])}"

        if "additional_if_yes" in q:
            question_line += f"\n{indent_str}  If 'Yes':"
            for item in q["additional_if_yes"]:
                if isinstance(item, str):
                    question_line += f"\n{indent_str}    - {item}"
                elif isinstance(item, dict):
                    question_line += f"\n{indent_str}    - {format_question_block(item, indent + 6)}"

        if "additional_if_no" in q:
            question_line += f"\n{indent_str}  If 'No':"
            for item in q["additional_if_no"]:
                if isinstance(item, str):
                    question_line += f"\n{indent_str}    - {item}"
                elif isinstance(item, dict):
                    question_line += f"\n{indent_str}    - {format_question_block(item, indent + 6)}"

    return question_line


def format_questions(questions_list):
    return "\n\n".join([
        f"{q['id']}. {format_question_block(q)}"
        for q in questions_list
    ])


In [7]:
def run_qna(case_id, patient_info_text, policy_url, policy_summary, questions_list,
            qna_model="chatgpt", chatgpt_client=None, perplexity_api_key=None,
            search_model="chatgpt"):

    questions_formatted = format_questions(questions_list)

    prompt = f"""
You are a clinical insurance assistant specializing in genetic testing coverage policies.

You will be given:
1. Patient clinical information (very important for decision making)
2. Summarized policy coverage criteria text (use this when available)

Instructions:
- If a useful policy summary is provided, prioritize it when answering.
- If the policy summary says "No relevant coverage criteria found", you must read and extract policy content directly from the policy_url using your web search tool.
- For each question:
    - Answer "Yes" or "No" based on the policy criteria and patient information.
    - If the question is a Free text question, provide a free text answer.
    - Strictly choose the answer from the options provided.
    - If options are provided, choose ONLY from those options.
    - If the question says "(Free text answer allowed)", you may write your answer freely.
    - If the question says "If Yes, ALSO select from ..." and you answered "Yes", you MUST also select from those follow-up options.
    - If the question says "If No, ALSO select from ..." and you answered "No", you MUST also select from those follow-up options.
- Use the patient's clinical context carefully if the policy is vague.
- Output answers in JSON format ONLY, with no explanation.

==== PATIENT INFORMATION ====
{patient_info_text}

==== SUMMARIZED POLICY COVERAGE CRITERIA (from URL: {policy_url}) ====
{policy_summary}

==== QUESTIONS ====
{questions_formatted}

Output your answers in JSON format only.
"""

    messages = [
        {"role": "system", "content": "You are a clinical insurance assistant."},
        {"role": "user", "content": prompt}
    ]

    def call_chatgpt():
        response = chatgpt_client.chat.completions.create(
            model="gpt-4o",
            messages=messages,
            temperature=0
        )
        return response.choices[0].message.content.strip()

    def call_chatgpt_websearch():
        response = chatgpt_client.responses.create(
            model="gpt-4o",
            input=messages,
            tools=[{"type": "web_search_preview"}]
        )
        return response.output_text.strip()

    def call_perplexity():
        headers = {
            "Authorization": f"Bearer {perplexity_api_key}",
            "Content-Type": "application/json"
        }
        data = {
            "model": "sonar-pro",
            "messages": messages,
            "temperature": 0
        }
        url = "https://api.perplexity.ai/chat/completions"
        res = requests.post(url, headers=headers, json=data)
        if res.status_code == 200:
            return res.json()["choices"][0]["message"]["content"].strip()
        else:
            raise Exception(f"Perplexity error: {res.status_code} - {res.text}")

    try:
        print(f"🧠 Running QnA ({qna_model})...")
        if qna_model == "chatgpt" and policy_summary.lower().strip() == "no relevant coverage criteria found":
            result_content = call_chatgpt_websearch()
        elif qna_model == "chatgpt":
            result_content = call_chatgpt()
        else:
            result_content = call_perplexity()
        result_json = clean_json_response(result_content)

        final_result = {}
        for k, v in result_json.items():
            if k == "policy_url":
                continue
            if "_selection" in k or "_details" in k:
                base_key = k.replace("_selection", "").replace("_details", "")
                final_result[f"{base_key}_followup"] = [v] if isinstance(v, str) else v
            else:
                final_result[k] = v

        result_dir = f"/home/cptaswadu/RESCUE-n8n/insurance/results/LLM_QnA/End-To-End/search_summarize_qna/{search_model}_{qna_model}"
        os.makedirs(result_dir, exist_ok=True)
        filename = os.path.join(result_dir, f"{case_id}_qna_result.json")

        with open(filename, "w") as f:
            json.dump(final_result, f, indent=2)

        print(f"✅ QnA result saved to {filename}")

    except Exception as e:
        print("❗ JSON parsing error:", e)
        final_result = {
            "error": "JSON parsing failed",
            "raw_content": result_content
        }

    print("QnA Result JSON:", final_result)
    return final_result

In [8]:
questions_file_path = "/home/cptaswadu/RESCUE-n8n/insurance/dataset/Insurance_Genetic_Testing_QA.json"

with open(questions_file_path, "r") as f:
    questions_data = json.load(f)

questions_list = questions_data["questions"]

In [9]:
def run_all_model_combinations(case_ex, questions_list, chatgpt_client, perplexity_api_key):
    model_options = ["chatgpt", "perplexity"]
    combinations = list(product(model_options, repeat=2))

    for search_model, qna_model in combinations:
        print(f"\n🚀 Starting experiments for: {search_model}_{qna_model}\n")

        for case in case_ex:
            case_id = case["id"]
            patient_info = case["patient_info"]

            print(f"\n=== Running for {case_id} ===")

            try:
                policy_data = find_policy_summarize(
                    patient_info_text=patient_info,
                    model=search_model,
                    chatgpt_client=chatgpt_client,
                    perplexity_api_key=perplexity_api_key
                )

                policy_url = policy_data.get("policy_url", "No policy found")
                policy_summary = policy_data.get("policy_summary", "No relevant coverage criteria found")

                run_qna(
                    case_id=case_id,
                    patient_info_text=patient_info,
                    policy_url=policy_url,
                    policy_summary=policy_summary,
                    questions_list=questions_list,
                    qna_model=qna_model,
                    chatgpt_client=chatgpt_client,  
                    perplexity_api_key=perplexity_api_key,
                    search_model=search_model
                )

            except Exception as e:
                print(f"❌ Failed for {case_id} in {search_model}_{qna_model}")
                traceback.print_exc()

run_all_model_combinations(
    case_ex=case_ex,
    questions_list=questions_list,
    chatgpt_client=chatgpt_client,  
    perplexity_api_key=perplexity_api_key
)



🚀 Starting experiments for: chatgpt_chatgpt


=== Running for Case1 ===
🔁 Attempt 1 (chatgpt)...
🔎 Raw result from LLM:
 ```json
{
  "policy_url": "https://www.uhcprovider.com/en/prior-auth-advance-notification/genetic-molecular-lab.html",
  "policy_summary": "UnitedHealthcare's policy considers Whole Exome Sequencing (WES) medically necessary for patients with unexplained congenital or neurodevelopmental disorders when:\n\n- **Age Requirement**: The patient is under 18 years of age.\n\n- **Medical Necessity**: A genetic etiology is strongly suspected based on clinical presentation, including:\n  - Multiple congenital anomalies affecting different organ systems.\n  - Moderate to profound intellectual disability diagnosed by 18 years of age.\n  - Global developmental delay.\n  - Epileptic encephalopathy with onset before 3 years of age.\n  - Two or more of the following: congenital anomaly, significant hearing or visual impairment diagnosed by 18 years of age, laboratory abnormalities 

In [10]:
def merge_qna_jsons_to_csv(folder_path, output_csv_path):
    all_data = []

    for file in os.listdir(folder_path):
        if file.endswith("_qna_result.json"):
            case_id = file.replace("_qna_result.json", "")
            json_path = os.path.join(folder_path, file)

            with open(json_path, "r") as f:
                try:
                    result = json.load(f)
                    flat_result = {"case_id": case_id}

                    for k, v in result.items():
                        if isinstance(v, list):
                            flat_result[k] = "; ".join(map(str, v))
                        else:
                            flat_result[k] = v

                    all_data.append(flat_result)
                except Exception as e:
                    print(f"❗ Failed to parse {file}: {e}")

    if all_data:
        df = pd.DataFrame(all_data)
        df.to_csv(output_csv_path, index=False)
        print(f"✅ Merged CSV saved to: {output_csv_path}")
    else:
        print(f"⚠️ No valid QnA result files found in: {folder_path}")

def merge_all_combinations_to_csv(base_dir):
    for folder_name in os.listdir(base_dir):
        folder_path = os.path.join(base_dir, folder_name)
        if os.path.isdir(folder_path):
            output_csv = os.path.join(base_dir, f"{folder_name}.csv")
            merge_qna_jsons_to_csv(folder_path, output_csv)


merge_all_combinations_to_csv(
    base_dir="/home/cptaswadu/RESCUE-n8n/insurance/results/LLM_QnA/End-To-End/search_summarize_qna"
)

✅ Merged CSV saved to: /home/cptaswadu/RESCUE-n8n/insurance/results/LLM_QnA/End-To-End/search_summarize_qna/chatgpt_perplexity.csv
✅ Merged CSV saved to: /home/cptaswadu/RESCUE-n8n/insurance/results/LLM_QnA/End-To-End/search_summarize_qna/chatgpt_chatgpt.csv
✅ Merged CSV saved to: /home/cptaswadu/RESCUE-n8n/insurance/results/LLM_QnA/End-To-End/search_summarize_qna/perplexity_chatgpt.csv
✅ Merged CSV saved to: /home/cptaswadu/RESCUE-n8n/insurance/results/LLM_QnA/End-To-End/search_summarize_qna/perplexity_perplexity.csv


In [11]:
def load_converted_results_from_folder(folder_path):
    converted_results = {}
    for file in os.listdir(folder_path):
        if file.endswith("_qna_result.json"):
            case_id = file.replace("_qna_result.json", "")
            file_path = os.path.join(folder_path, file)
            with open(file_path, "r") as f:
                try:
                    data = json.load(f)
                except json.JSONDecodeError as e:
                    print(f"❗ JSON decode error in {file}: {e}")
                    continue

                clean = {}
                for k, v in data.items():
                    if k.endswith("_followup"):
                        clean[k] = v if isinstance(v, list) else [v]
                    else:
                        clean[k] = v

                converted_results[case_id] = clean
    return converted_results

In [12]:
def evaluate_qna_result(case_id, predicted_result, gold_result, folder_path=None):
    records = []
    correct_count = 0
    total_count = 0

    for qid in gold_result:
        if not qid.startswith("Q") or qid == "policy_url" or qid == "Q17" or "_followup" in qid:
            continue

        pred_answer = predicted_result.get(qid, "")
        gold_answer = gold_result.get(qid, "")

        if isinstance(pred_answer, list):
            pred_answer = ", ".join(pred_answer)
        if isinstance(gold_answer, list):
            gold_answer = ", ".join(gold_answer)

        pred_answer = pred_answer.strip()
        gold_answer = gold_answer.strip()

        is_correct = pred_answer == gold_answer
        score = 1 if is_correct else 0

        records.append({
            "Case": case_id,
            "Question": qid,
            "Predicted": pred_answer,
            "Gold": gold_answer,
            "Score": score
        })

        total_count += 1
        correct_count += score

        followup_key = qid + "_followup"
        pred_followup = predicted_result.get(followup_key, None)
        gold_followup = gold_result.get(followup_key, None)

        if is_correct and gold_followup is not None:
            def normalize(ans):
                if ans is None:
                    return "None"
                if isinstance(ans, list):
                    return ", ".join([a if isinstance(a, str) else a.get("answer", str(a)) for a in ans])
                return ans if isinstance(ans, str) else str(ans)

            pred_followup_norm = normalize(pred_followup)
            gold_followup_norm = normalize(gold_followup)

            pred_set = set(pred_followup_norm.split(", "))
            gold_set = set(gold_followup_norm.split(", "))

            followup_score = 1 if pred_set & gold_set else 0

            records.append({
                "Case": case_id,
                "Question": followup_key,
                "Predicted": pred_followup_norm,
                "Gold": gold_followup_norm,
                "Score": followup_score
            })

            total_count += 1
            correct_count += followup_score

    accuracy = correct_count / total_count * 100 if total_count > 0 else 0

    records.append({
        "Case": case_id,
        "Question": "TOTAL",
        "Predicted": f"Correct: {correct_count}",
        "Gold": f"Incorrect: {total_count - correct_count}",
        "Score": f"Accuracy: {accuracy:.2f}%"
    })

    df = pd.DataFrame(records)

    # ✅ Save CSV if folder_path is given
    if folder_path:
        os.makedirs(folder_path, exist_ok=True)
        csv_path = os.path.join(folder_path, f"evaluation_{case_id}.csv")
        df.to_csv(csv_path, index=False)
        print(f"📄 Saved evaluation to {csv_path}")

    return df, accuracy

In [13]:
def evaluate_all_folders_with_summary(base_dir, gold_answers, summary_output_csv):
    eval_output_dir = os.path.join(base_dir, "Evaluation")
    os.makedirs(eval_output_dir, exist_ok=True)

    summary_records = []

    for folder_name in os.listdir(base_dir):
        folder_path = os.path.join(base_dir, folder_name)
        if not os.path.isdir(folder_path):
            continue

        print(f"\n📊 Evaluating folder: {folder_name}")

        converted_results = load_converted_results_from_folder(folder_path)
        all_dfs = []
        accuracies = []

        for case_id, pred_result in converted_results.items():
            gold_result = gold_answers.get(case_id)
            if gold_result is None:
                continue

            df_case, acc = evaluate_qna_result(case_id, pred_result, gold_result)
            all_dfs.append(df_case)
            accuracies.append(acc)

        if all_dfs:
            merged_df = pd.concat(all_dfs, ignore_index=True)
            eval_csv_path = os.path.join(eval_output_dir, f"{folder_name}.csv")
            merged_df.to_csv(eval_csv_path, index=False)
            print(f"✅ Saved: {eval_csv_path}")

            
            summary_records.append({
                "Model_Combination": folder_name,
                "Mean_Accuracy": f"{sum(accuracies)/len(accuracies):.2f}%" if accuracies else "N/A"
            })

    
    if summary_records:
        summary_df = pd.DataFrame(summary_records)
        os.makedirs(os.path.dirname(summary_output_csv), exist_ok=True)
        summary_df.to_csv(summary_output_csv, index=False)
        print(f"\n✅ Summary saved to: {summary_output_csv}")
        print(summary_df)

In [14]:
evaluate_all_folders_with_summary(
    base_dir="/home/cptaswadu/RESCUE-n8n/insurance/results/LLM_QnA/End-To-End/search_summarize_qna",
    gold_answers=ground_truth,
    summary_output_csv="/home/cptaswadu/RESCUE-n8n/insurance/results/LLM_QnA/End-To-End/search_summarize_qna/Evaluation/summary_accuracy.csv"
)



📊 Evaluating folder: chatgpt_perplexity
✅ Saved: /home/cptaswadu/RESCUE-n8n/insurance/results/LLM_QnA/End-To-End/search_summarize_qna/Evaluation/chatgpt_perplexity.csv

📊 Evaluating folder: Evaluation

📊 Evaluating folder: chatgpt_chatgpt
✅ Saved: /home/cptaswadu/RESCUE-n8n/insurance/results/LLM_QnA/End-To-End/search_summarize_qna/Evaluation/chatgpt_chatgpt.csv

📊 Evaluating folder: perplexity_chatgpt
✅ Saved: /home/cptaswadu/RESCUE-n8n/insurance/results/LLM_QnA/End-To-End/search_summarize_qna/Evaluation/perplexity_chatgpt.csv

📊 Evaluating folder: perplexity_perplexity
✅ Saved: /home/cptaswadu/RESCUE-n8n/insurance/results/LLM_QnA/End-To-End/search_summarize_qna/Evaluation/perplexity_perplexity.csv

✅ Summary saved to: /home/cptaswadu/RESCUE-n8n/insurance/results/LLM_QnA/End-To-End/search_summarize_qna/Evaluation/summary_accuracy.csv
       Model_Combination Mean_Accuracy
0     chatgpt_perplexity        78.95%
1        chatgpt_chatgpt        75.00%
2     perplexity_chatgpt        70.0