# **Intelligent Form Agent**

## Installation

In [2]:
!pip install transformers huggingface_hub faker

Collecting faker
  Downloading faker-37.11.0-py3-none-any.whl.metadata (15 kB)
Downloading faker-37.11.0-py3-none-any.whl (2.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m17.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faker
Successfully installed faker-37.11.0


## Imports

In [3]:
from faker import Faker
from typing import List, Dict
from huggingface_hub import InferenceClient
from google.colab import userdata
from typing import defaultdict
import pandas as pd
import random
import os
import re
import json
import yaml

## Data Creation for insurance claim

In [6]:
fake = Faker()

In [7]:
OUTPUT_DIR = 'data\claim'
CONFIG_PATH = "/content/config.yaml"
MIN_CLAIMS_PER_PATIENT = 2
MAX_CLAIMS_PER_PATIENT = 6

fake = Faker()
with open(CONFIG_PATH, "r") as file:
    data = yaml.safe_load(file)

ICD_CODES = data["ICD_CODES"]
CPT_CODES = data["CPT_CODES"]
DRUG_NAMES = data["DRUG_NAMES"]
PROVIDER_NAMES = data["PROVIDER_NAMES"]


def create_patient_profile(patient_id):
    return {
        "patient_id": patient_id,
        "first_name": fake.first_name(),
        "last_name": fake.last_name(),
        "date_of_birth": fake.date_between(start_date="-70y", end_date="-20y").isoformat(),
        "policy_number": fake.bothify(text='P#####-##')
    }


def generate_claim(patient_profile, base_diagnosis, claim_number_offset):
    procedure_key = "Office Visit (Established Patient)"
    provider = random.choice(PROVIDER_NAMES)

    if base_diagnosis == "Hypertension":
        procedure_key = "Echocardiogram"
        provider = PROVIDER_NAMES[2]
    elif base_diagnosis == "Migraine":
        procedure_key = "MRI Brain w/o Contrast"
        provider = PROVIDER_NAMES[1]
    elif base_diagnosis == "Asthma":
        procedure_key = "Spirometry (Lung Function Test)"
        provider = PROVIDER_NAMES[4]
    elif base_diagnosis == "Type 2 Diabetes":
        procedure_key = "Blood Glucose Test"
        provider = PROVIDER_NAMES[0]

    procedure_code = CPT_CODES[procedure_key]

    visit_date = fake.date_between(
        start_date=f"-{claim_number_offset}M", end_date=f"-{claim_number_offset}M").isoformat()

    billed_amount = round(random.uniform(500.0, 3500.0), 2)
    allowed_amount = round(billed_amount * random.uniform(0.65, 0.85), 2)
    copay = random.choice([25.0, 50.0, 75.0])
    insurance_paid = round(allowed_amount - copay, 2)

    summary = (
        f"Claim for {base_diagnosis} ({ICD_CODES[base_diagnosis]}). "
        f"Patient presented with symptoms requiring {procedure_key} ({procedure_code}). "
        f"Recommended medication: {random.choice(DRUG_NAMES)}."
    )

    clinical_note = (
        f"CLINICAL NOTE: Patient {patient_profile['first_name']} {patient_profile['last_name']} (Policy: {patient_profile['policy_number']}) "
        f"was seen today, {visit_date}, by {provider}. The main subjective complaint was a "
        f"recurrent flare-up of their **{base_diagnosis}** symptoms, which are generally well-managed. "
        f"{fake.paragraph(nb_sentences=2)} Assessment determined the necessity of a diagnostic "
        f"procedure to confirm the severity: **{procedure_key}** (CPT: {procedure_code}). "
        f"The diagnosis code assigned is **{ICD_CODES[base_diagnosis]}**. The patient was advised "
        f"on the necessity of lifestyle modifications and will be starting the new medication, "
        f"{random.choice(DRUG_NAMES)}, immediately. Total billed charges for this visit are ${billed_amount:.2f}. "
        f"All staff were informed regarding the high priority of the patient's next appointment."
    )

    structured_details = {
        "claim_id": fake.bothify(text='CLM#########'),
        "claim_date": visit_date,
        "patient_info": patient_profile,
        "provider_name": provider,
        "primary_diagnosis": base_diagnosis,
        "icd_code": ICD_CODES[base_diagnosis],
        "procedure_description": procedure_key,
        "cpt_code": procedure_code,
        "financials": {
            "billed_amount": billed_amount,
            "allowed_amount": allowed_amount,
            "copay": copay,
            "insurance_paid": insurance_paid
        }
    }

    return structured_details, clinical_note, summary



def save_claim_files(structured_details, unstructured_note, summary):

    patient_id = structured_details['patient_info']['patient_id']
    claim_id = structured_details['claim_id']

    claim_dir = os.path.join(OUTPUT_DIR, patient_id, claim_id)

    os.makedirs(claim_dir, exist_ok=True)

    with open(os.path.join(claim_dir, 'claim_details.json'), 'w') as f:
        json.dump(structured_details, f, indent=4)

    text_content = (
        "---CLINICAL NOTE ---\n"
        f"{unstructured_note}\n\n"
        "---SUMMARY ---\n"
        f"{summary}\n"
    )

    with open(os.path.join(claim_dir, 'claim_text_data.txt'), 'w') as f:
        f.write(text_content)


patient_profiles = {
    "PA-12345": create_patient_profile("PA-12345"),
    "PB-24680": create_patient_profile("PB-24680"),
    "PC-13579": create_patient_profile("PC-13579"),
    "PD-09876": create_patient_profile("PD-09876"),
    "PE-54321": create_patient_profile("PE-54321"),
}

patient_conditions = {
    "PA-12345": "Hypertension",
    "PB-24680": "Migraine",
    "PC-13579": "Asthma",
    "PD-09876": "Type 2 Diabetes",
    "PE-54321": "Acute Sinusitis",
}

total_claims_generated = 0
all_icd_keys = list(ICD_CODES.keys())
print("--- STARTING DYNAMIC TEST DATA GENERATION ---")
for patient_id, base_condition in patient_conditions.items():
    patient = patient_profiles[patient_id]

    num_claims = random.randint(MIN_CLAIMS_PER_PATIENT, MAX_CLAIMS_PER_PATIENT)
    print(
        f"Generating {num_claims} claims for Patient {patient_id} ({base_condition})...")

    for i in range(1, num_claims + 1):
        current_diagnosis = base_condition
        if random.random() < 0.25 and len(all_icd_keys) > 1:
            available_diagnoses = [d for d in all_icd_keys if d != base_condition]
            if available_diagnoses:
                current_diagnosis = random.choice(available_diagnoses)
        structured, unstructured, summary = generate_claim(
            patient, current_diagnosis, num_claims - i + 1)
        save_claim_files(structured, unstructured, summary)
        total_claims_generated += 1

print("--- FILE GENERATION COMPLETE ---")
print(f"Total Claims Generated: {total_claims_generated}")
print(f"Data is organized in the '{OUTPUT_DIR}' directory.")
print("\nExample Path Structure (Note the single text file):")
print(f"  {OUTPUT_DIR}/PA-12345/CLM#########/claim_details.json")
print(f"  {OUTPUT_DIR}/PA-12345/CLM#########/claim_text_data.txt")


--- STARTING DYNAMIC TEST DATA GENERATION ---
Generating 3 claims for Patient PA-12345 (Hypertension)...
Generating 4 claims for Patient PB-24680 (Migraine)...
Generating 3 claims for Patient PC-13579 (Asthma)...
Generating 5 claims for Patient PD-09876 (Type 2 Diabetes)...
Generating 6 claims for Patient PE-54321 (Acute Sinusitis)...
--- FILE GENERATION COMPLETE ---
Total Claims Generated: 21
Data is organized in the 'data\claim' directory.

Example Path Structure (Note the single text file):
  data\claim/PA-12345/CLM#########/claim_details.json
  data\claim/PA-12345/CLM#########/claim_text_data.txt


  OUTPUT_DIR = 'data\claim'


## Extractive QA

Define Model

In [8]:
MODEL_NAME = "deepset/roberta-large-squad2"

Setup Token

Setup Client

In [10]:
client = InferenceClient(
    provider="hf-inference",
    api_key=userdata.get('HF_TOKEN'),
)

Load the form content

In [19]:
parent_dir = "/content/data\claim/PA-12345/"

  parent_dir = "/content/data\claim/PA-12345/"


In [21]:
folders = os.listdir(parent_dir)

data_path = os.path.join(parent_dir, folders[0])
data_path

'/content/data\\claim/PA-12345/CLM838825116'

In [22]:
print(os.path.exists(data_path))

True


In [23]:
files = os.listdir(data_path)
files

['claim_details.json', 'claim_text_data.txt']

In [24]:
file_path_details = os.path.join(data_path,files[0])
file_path_details

'/content/data\\claim/PA-12345/CLM838825116/claim_details.json'

In [25]:
with open(file_path_details,'r',encoding="utf-8") as file:
  content_details = file.read()
content_details

'{\n    "claim_id": "CLM838825116",\n    "claim_date": "2025-08-18",\n    "patient_info": {\n        "patient_id": "PA-12345",\n        "first_name": "Jesse",\n        "last_name": "Waters",\n        "date_of_birth": "1960-05-07",\n        "policy_number": "P78831-16"\n    },\n    "provider_name": "Dr. Ava Sharma (Cardiology)",\n    "primary_diagnosis": "Hypertension",\n    "icd_code": "I10",\n    "procedure_description": "Echocardiogram",\n    "cpt_code": "93306",\n    "financials": {\n        "billed_amount": 3425.16,\n        "allowed_amount": 2252.49,\n        "copay": 75.0,\n        "insurance_paid": 2177.49\n    }\n}'

In [26]:
file_path_text = os.path.join(data_path,files[1])
file_path_text

'/content/data\\claim/PA-12345/CLM838825116/claim_text_data.txt'

In [27]:
with open(file_path_text,'r',encoding="utf-8") as file:
  content_text = file.read()
content_text

"---CLINICAL NOTE ---\nCLINICAL NOTE: Patient Jesse Waters (Policy: P78831-16) was seen today, 2025-08-18, by Dr. Ava Sharma (Cardiology). The main subjective complaint was a recurrent flare-up of their **Hypertension** symptoms, which are generally well-managed. Student style yeah rule doctor among audience few. Assessment determined the necessity of a diagnostic procedure to confirm the severity: **Echocardiogram** (CPT: 93306). The diagnosis code assigned is **I10**. The patient was advised on the necessity of lifestyle modifications and will be starting the new medication, Lisinopril (20mg), immediately. Total billed charges for this visit are $3425.16. All staff were informed regarding the high priority of the patient's next appointment.\n\n---SUMMARY ---\nClaim for Hypertension (I10). Patient presented with symptoms requiring Echocardiogram (93306). Recommended medication: Atorvastatin (10mg).\n"

In [28]:
combined_details = content_details + content_text
combined_details

'{\n    "claim_id": "CLM838825116",\n    "claim_date": "2025-08-18",\n    "patient_info": {\n        "patient_id": "PA-12345",\n        "first_name": "Jesse",\n        "last_name": "Waters",\n        "date_of_birth": "1960-05-07",\n        "policy_number": "P78831-16"\n    },\n    "provider_name": "Dr. Ava Sharma (Cardiology)",\n    "primary_diagnosis": "Hypertension",\n    "icd_code": "I10",\n    "procedure_description": "Echocardiogram",\n    "cpt_code": "93306",\n    "financials": {\n        "billed_amount": 3425.16,\n        "allowed_amount": 2252.49,\n        "copay": 75.0,\n        "insurance_paid": 2177.49\n    }\n}---CLINICAL NOTE ---\nCLINICAL NOTE: Patient Jesse Waters (Policy: P78831-16) was seen today, 2025-08-18, by Dr. Ava Sharma (Cardiology). The main subjective complaint was a recurrent flare-up of their **Hypertension** symptoms, which are generally well-managed. Student style yeah rule doctor among audience few. Assessment determined the necessity of a diagnostic proc

Remove unnecssory white spaces

In [29]:
content = combined_details.strip()

Question to ask

In [30]:
question = "Who is the doctor?"

Generate Answer

In [31]:
answer = client.question_answering(
    question=question,
    context=content,
    model=MODEL_NAME,
)

In [32]:
MODEL_NAME

'deepset/roberta-large-squad2'

In [33]:
content

'{\n    "claim_id": "CLM838825116",\n    "claim_date": "2025-08-18",\n    "patient_info": {\n        "patient_id": "PA-12345",\n        "first_name": "Jesse",\n        "last_name": "Waters",\n        "date_of_birth": "1960-05-07",\n        "policy_number": "P78831-16"\n    },\n    "provider_name": "Dr. Ava Sharma (Cardiology)",\n    "primary_diagnosis": "Hypertension",\n    "icd_code": "I10",\n    "procedure_description": "Echocardiogram",\n    "cpt_code": "93306",\n    "financials": {\n        "billed_amount": 3425.16,\n        "allowed_amount": 2252.49,\n        "copay": 75.0,\n        "insurance_paid": 2177.49\n    }\n}---CLINICAL NOTE ---\nCLINICAL NOTE: Patient Jesse Waters (Policy: P78831-16) was seen today, 2025-08-18, by Dr. Ava Sharma (Cardiology). The main subjective complaint was a recurrent flare-up of their **Hypertension** symptoms, which are generally well-managed. Student style yeah rule doctor among audience few. Assessment determined the necessity of a diagnostic proc

Print Answer

In [34]:
answer

QuestionAnsweringOutputElement(answer='Dr. Ava Sharma', end=303, score=0.8114892244338989, start=289)

## Summarize

Summarization Model

In [35]:
SUMMARIZATION_MODEL = "facebook/bart-large-cnn"

Setup API

In [36]:
client = InferenceClient(
    provider="hf-inference",
    api_key=userdata.get('HF_TOKEN'),
)

Open file and read content

In [38]:
combined_details

'{\n    "claim_id": "CLM838825116",\n    "claim_date": "2025-08-18",\n    "patient_info": {\n        "patient_id": "PA-12345",\n        "first_name": "Jesse",\n        "last_name": "Waters",\n        "date_of_birth": "1960-05-07",\n        "policy_number": "P78831-16"\n    },\n    "provider_name": "Dr. Ava Sharma (Cardiology)",\n    "primary_diagnosis": "Hypertension",\n    "icd_code": "I10",\n    "procedure_description": "Echocardiogram",\n    "cpt_code": "93306",\n    "financials": {\n        "billed_amount": 3425.16,\n        "allowed_amount": 2252.49,\n        "copay": 75.0,\n        "insurance_paid": 2177.49\n    }\n}---CLINICAL NOTE ---\nCLINICAL NOTE: Patient Jesse Waters (Policy: P78831-16) was seen today, 2025-08-18, by Dr. Ava Sharma (Cardiology). The main subjective complaint was a recurrent flare-up of their **Hypertension** symptoms, which are generally well-managed. Student style yeah rule doctor among audience few. Assessment determined the necessity of a diagnostic proc

In [39]:
len(combined_details)

1516

Create Summarization

In [40]:
result = client.summarization(
    text=combined_details,
    model=SUMMARIZATION_MODEL,
)

In [41]:
len(result.summary_text)

240

Print Result

In [42]:
result

SummarizationOutput(summary_text='Claim for Hypertension (I10). Patient presented with symptoms requiring Echocardiogram (93306) Recommended medication: Atorvastatin (10mg). Total billed charges for this visit are $3425.16. Student style yeah rule doctor among audience few.')

In [43]:
result.summary_text

'Claim for Hypertension (I10). Patient presented with symptoms requiring Echocardiogram (93306) Recommended medication: Atorvastatin (10mg). Total billed charges for this visit are $3425.16. Student style yeah rule doctor among audience few.'

## Holistic Insight From multiple claim of same patient

In [94]:
question = "What is the total out-of-pocket cost (copay) for patient PE-54321 for all claims filed this year?"

In [92]:
patient_id = "PE-54321"

In [45]:
base_dir = "/content/data\claim"

  base_dir = "/content/data\claim"


In [93]:
patient_dir = os.path.join(base_dir, patient_id)
patient_dir

'/content/data\\claim/PE-54321'

In [47]:
all_claims = []

In [48]:
for claim_id in os.listdir(patient_dir):
    claim_path = os.path.join(patient_dir, claim_id)

    if os.path.isdir(claim_path):
        json_file = os.path.join(claim_path, 'claim_details.json')
        if os.path.exists(json_file):
            try:
                with open(json_file, 'r') as f:
                    claim_data = json.load(f)
                    all_claims.append(claim_data)
            except json.JSONDecodeError:
                print(f"Warning: Could not parse JSON in {json_file}")

all_claims.sort(key=lambda c: c['claim_date'])



In [95]:
all_claims

[{'claim_id': 'CLM252033920',
  'claim_date': '2025-04-18',
  'patient_info': {'patient_id': 'PE-54321',
   'first_name': 'Patricia',
   'last_name': 'Hardy',
   'date_of_birth': '1957-09-13',
   'policy_number': 'P99179-78'},
  'provider_name': 'Central City Hospital',
  'primary_diagnosis': 'Acute Sinusitis',
  'icd_code': 'J01.90',
  'procedure_description': 'Office Visit (Established Patient)',
  'cpt_code': '99214',
  'financials': {'billed_amount': 2172.64,
   'allowed_amount': 1779.11,
   'copay': 25.0,
   'insurance_paid': 1754.11}},
 {'claim_id': 'CLM933602523',
  'claim_date': '2025-05-18',
  'patient_info': {'patient_id': 'PE-54321',
   'first_name': 'Patricia',
   'last_name': 'Hardy',
   'date_of_birth': '1957-09-13',
   'policy_number': 'P99179-78'},
  'provider_name': 'Dr. Eleanor Vance (Internal Medicine)',
  'primary_diagnosis': 'Type 2 Diabetes',
  'icd_code': 'E11.9',
  'procedure_description': 'Blood Glucose Test',
  'cpt_code': '82947',
  'financials': {'billed_amo

In [50]:
total_copay = sum(claim['financials']['copay'] for claim in all_claims)
total_allowed_amount = sum(claim['financials']['allowed_amount'] for claim  in all_claims)
total_insurance_paid = sum(claim['financials']['insurance_paid'] for claim  in all_claims)
providers = set(claim['provider_name'] for claim in all_claims)
diagnosis_counts = defaultdict(int)
for claim in all_claims:
    diagnosis_counts[claim['primary_diagnosis']] += 1

print(f"Print Holistic Report!")
print(f"Total Copay: {total_copay}")
print(f"Total Allowed amount: {total_allowed_amount}")
print(f"Total Insurance paid: {total_insurance_paid}")
print(f"Provider names: {providers}")
print(f"Diagnosis count: {diagnosis_counts}")



Print Holistic Report!
Total Copay: 300.0
Total Allowed amount: 8692.35
Total Insurance paid: 8392.35
Provider names: {'Dr. Marcus Bell (Neurology)', 'Dr. Ava Sharma (Cardiology)', 'Dr. Eleanor Vance (Internal Medicine)', 'Central City Hospital'}
Diagnosis count: defaultdict(<class 'int'>, {'Acute Sinusitis': 3, 'Type 2 Diabetes': 2, 'Hypertension': 1})


**Holistic Insight from Unstructured data using LLM**

In [51]:
# if "emergency" in raw_notes_combined.lower() or "hospital" in raw_notes_combined.lower() or "urgent" in raw_notes_combined.lower():
#     clinical_finding = "The narrative notes mention an episode requiring **urgent** or **emergency** intervention, suggesting a period of instability or complication, despite the ongoing management of the primary condition."
#     risk_level = "High"
# elif claim_count > 4 and len(unique_providers) > 2:
#     clinical_finding = "The patient shows a high utilization pattern, having seen multiple specialists. The condition appears stable, but this indicates complex chronic management across different care settings."
#     risk_level = "Medium-High"
# else:
#     clinical_finding = "The patient's condition ({primary_diagnosis}) appears well-controlled based on the narrative notes, with consistent care from the same primary provider. No acute complications were noted."
#     risk_level = "Low"

In [96]:
patient_dir = f"/content/data\claim/{patient_id}"

  patient_dir = f"/content/data\claim/{patient_id}"


In [97]:
patient_dir

'/content/data\\claim/PE-54321'

In [98]:
for claim_id in os.listdir(patient_dir):
  print(claim_id)

CLM933602523
CLM414997566
CLM548971071
CLM252033920
CLM552152493
CLM835240936


In [99]:
all_notes = []
for claim_id in os.listdir(patient_dir):
  file_path = os.path.join(patient_dir, claim_id, 'claim_text_data.txt')
  if os.path.exists(file_path):
      with open(file_path, 'r') as f:
          all_notes.append(f.read())


In [100]:
all_notes[:2]

["---CLINICAL NOTE ---\nCLINICAL NOTE: Patient Patricia Hardy (Policy: P99179-78) was seen today, 2025-05-18, by Dr. Eleanor Vance (Internal Medicine). The main subjective complaint was a recurrent flare-up of their **Type 2 Diabetes** symptoms, which are generally well-managed. Start rather catch front now positive enjoy. Until season yes right question speech team. Assessment determined the necessity of a diagnostic procedure to confirm the severity: **Blood Glucose Test** (CPT: 82947). The diagnosis code assigned is **E11.9**. The patient was advised on the necessity of lifestyle modifications and will be starting the new medication, Metformin (500mg), immediately. Total billed charges for this visit are $785.68. All staff were informed regarding the high priority of the patient's next appointment.\n\n---SUMMARY ---\nClaim for Type 2 Diabetes (E11.9). Patient presented with symptoms requiring Blood Glucose Test (82947). Recommended medication: Metformin (500mg).\n",
 "---CLINICAL NO

In [101]:
all_claims[:2]

[{'claim_id': 'CLM252033920',
  'claim_date': '2025-04-18',
  'patient_info': {'patient_id': 'PE-54321',
   'first_name': 'Patricia',
   'last_name': 'Hardy',
   'date_of_birth': '1957-09-13',
   'policy_number': 'P99179-78'},
  'provider_name': 'Central City Hospital',
  'primary_diagnosis': 'Acute Sinusitis',
  'icd_code': 'J01.90',
  'procedure_description': 'Office Visit (Established Patient)',
  'cpt_code': '99214',
  'financials': {'billed_amount': 2172.64,
   'allowed_amount': 1779.11,
   'copay': 25.0,
   'insurance_paid': 1754.11}},
 {'claim_id': 'CLM933602523',
  'claim_date': '2025-05-18',
  'patient_info': {'patient_id': 'PE-54321',
   'first_name': 'Patricia',
   'last_name': 'Hardy',
   'date_of_birth': '1957-09-13',
   'policy_number': 'P99179-78'},
  'provider_name': 'Dr. Eleanor Vance (Internal Medicine)',
  'primary_diagnosis': 'Type 2 Diabetes',
  'icd_code': 'E11.9',
  'procedure_description': 'Blood Glucose Test',
  'cpt_code': '82947',
  'financials': {'billed_amo

In [102]:
system_prompt = (
    "You are a Senior Medical Data Analyst. Your task is to provide a holistic, "
    "AI-driven summary for a patient based on their entire claims history. "
    "Synthesize the provided structured facts with the clinical context from the raw notes. "
    "Focus on: 1) Stability of the condition, 2) Financial trends, and 3) Any signs of complication or new issues. "
    "Begin your response with 'HOLISTIC INSIGHT:'."
)

In [103]:
user_query = (
    f"Analyze the following patient history and provide a concise, professional summary:\n\n"
    f"--- STRUCTURED FACTS (Aggregated by Agent) ---\n"
    f"{json.dumps(all_claims, indent=2)}\n\n"
    f"--- RAW CLINICAL NARRATIVES (Full Context) ---\n"
    f"{all_notes}"
)

In [105]:
client = InferenceClient(token=userdata.get("HF_TOKEN"))

In [107]:
messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_query}
  ]

In [164]:
response = client.chat_completion(
        messages=messages,
        model="meta-llama/Meta-Llama-3-70B-Instruct",
        max_tokens=512,
        temperature=0.5,
    )


In [None]:
ai_insight_text = response.choices[0].message.content
ai_insight_text

## Abstractive(Generative) QA

In [110]:
system_prompt = (
        "You are an expert Clinical Data Analyst. Your task is to answer a user's question based *only* on the provided details. "
        "You must provide your response in a strict JSON format with two keys: 'answer' and 'reasoning'.\n"
        "1. The 'answer' should be a direct and concise response to the user's question.\n"
        "2. The 'reasoning' must explain how you found the answer and include the *exact quote* from the text that supports your conclusion."
    )

In [111]:
all_notes

["---CLINICAL NOTE ---\nCLINICAL NOTE: Patient Patricia Hardy (Policy: P99179-78) was seen today, 2025-05-18, by Dr. Eleanor Vance (Internal Medicine). The main subjective complaint was a recurrent flare-up of their **Type 2 Diabetes** symptoms, which are generally well-managed. Start rather catch front now positive enjoy. Until season yes right question speech team. Assessment determined the necessity of a diagnostic procedure to confirm the severity: **Blood Glucose Test** (CPT: 82947). The diagnosis code assigned is **E11.9**. The patient was advised on the necessity of lifestyle modifications and will be starting the new medication, Metformin (500mg), immediately. Total billed charges for this visit are $785.68. All staff were informed regarding the high priority of the patient's next appointment.\n\n---SUMMARY ---\nClaim for Type 2 Diabetes (E11.9). Patient presented with symptoms requiring Blood Glucose Test (82947). Recommended medication: Metformin (500mg).\n",
 "---CLINICAL NO

In [141]:
question = "Provide holistic analysis of this patient"

In [142]:
user_query = (
        f"Based on the following clinical note, please answer my question.\n\n"
        f"--- CLAIM DETAILS ---\n"
        f"{all_claims}\n\n"
        f"--- CLINICAL NOTE ---\n"
        f"{all_notes}\n\n"
        f"--- QUESTION ---\n"
        f"{question}"
    )

In [114]:
client = InferenceClient(token=userdata.get("HF_TOKEN"))
client

<InferenceClient(model='', timeout=None)>

In [144]:
messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_query}
    ]

In [145]:
len(messages[0]['content']) + len(messages[1]['content'])

9420

In [158]:
response_format = {
    "type": "json_schema",
    "json_schema": {
        "name": "answer_and_reasoning",
        "description": "Return a short final answer and a concise rationale (non-stepwise summary).",
        "schema": {
            "type": "object",
            "properties": {
                "answer": {
                    "type": "string",
                    "description": "The short final answer (direct)."
                },
                "reasoning": {
                    "type": "string",
                    "description": "A concise, non-step-by-step summary explaining why the answer is correct."
                }
            },
            "required": ["answer", "reasoning"],
            "additionalProperties": False
        },
        "strict": True
    }
}


In [159]:
response = client.chat_completion(
            messages=messages,
            model="meta-llama/Meta-Llama-3-70B-Instruct",
            max_tokens=512,
            temperature=0.1, # Low temperature for factual, non-creative responses
            response_format=response_format

        )

In [160]:
llm_output_str = response.choices[0].message.content
llm_output_str

'{\n"answer": "The patient, Patricia Hardy, has multiple chronic conditions, including Acute Sinusitis, Type 2 Diabetes, and Hypertension, which are generally well-managed. She has been seen by multiple providers, including Dr. Eleanor Vance (Internal Medicine), Dr. Ava Sharma (Cardiology), and Dr. Marcus Bell (Neurology), and has undergone various diagnostic procedures, including Blood Glucose Tests, Echocardiograms, and Office Visits. The patient has been prescribed multiple medications, including Metformin, Amoxicillin, Fluticasone Propionate, Lisinopril, and Albuterol HFA, and has been advised on lifestyle modifications.",\n"reasoning": "The answer is based on the analysis of the clinical notes, which provide a comprehensive overview of the patient\'s medical history, diagnoses, and treatments. The notes mention the patient\'s multiple chronic conditions, including Acute Sinusitis (J01.90), Type 2 Diabetes (E11.9), and Hypertension (I10), and the various diagnostic procedures and m

In [161]:
parsed_json = json.loads(llm_output_str)
parsed_json

{'answer': 'The patient, Patricia Hardy, has multiple chronic conditions, including Acute Sinusitis, Type 2 Diabetes, and Hypertension, which are generally well-managed. She has been seen by multiple providers, including Dr. Eleanor Vance (Internal Medicine), Dr. Ava Sharma (Cardiology), and Dr. Marcus Bell (Neurology), and has undergone various diagnostic procedures, including Blood Glucose Tests, Echocardiograms, and Office Visits. The patient has been prescribed multiple medications, including Metformin, Amoxicillin, Fluticasone Propionate, Lisinopril, and Albuterol HFA, and has been advised on lifestyle modifications.',
 'reasoning': "The answer is based on the analysis of the clinical notes, which provide a comprehensive overview of the patient's medical history, diagnoses, and treatments. The notes mention the patient's multiple chronic conditions, including Acute Sinusitis (J01.90), Type 2 Diabetes (E11.9), and Hypertension (I10), and the various diagnostic procedures and medica

In [162]:
parsed_json['reasoning']

"The answer is based on the analysis of the clinical notes, which provide a comprehensive overview of the patient's medical history, diagnoses, and treatments. The notes mention the patient's multiple chronic conditions, including Acute Sinusitis (J01.90), Type 2 Diabetes (E11.9), and Hypertension (I10), and the various diagnostic procedures and medications prescribed. The quotes that support this conclusion include: 'The main subjective complaint was a recurrent flare-up of their **Type 2 Diabetes** symptoms, which are generally well-managed.' (CLINICAL NOTE, 2025-05-18), 'The main subjective complaint was a recurrent flare-up of their **Hypertension** symptoms, which are generally well-managed.' (CLINICAL NOTE, 2025-06-18), and 'The main subjective complaint was a recurrent flare-up of their **Acute Sinusitis** symptoms, which are generally well-managed.' (CLINICAL NOTE, 2025-07-18)."