In [1]:
import os
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import LanceDB
from langchain_community.embeddings import HuggingFaceEmbeddings
import lancedb
from langchain.chains import RetrievalQA
from langchain_openai import ChatOpenAI


In [2]:
PDF_PATH = "Kenya-ARV-Guidelines-2022-Final-1.pdf"

loader = PyPDFLoader(PDF_PATH)
documents = loader.load()

print(f"Loaded {len(documents)} pages")


Loaded 286 pages


## Chunking

In [3]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=800,
    chunk_overlap=150,
    separators=["\n\n", "\n", ".", " "]
)

chunks = text_splitter.split_documents(documents)
print(f"Created {len(chunks)} chunks")


Created 1013 chunks


In [4]:
documents

[Document(page_content=' \n \n', metadata={'source': 'Kenya-ARV-Guidelines-2022-Final-1.pdf', 'page': 0}),
 Document(page_content='', metadata={'source': 'Kenya-ARV-Guidelines-2022-Final-1.pdf', 'page': 1}),
 Document(page_content='  \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \nKenya HIV Prevention and Treatment Guidelines, 2022  \n \n2022 Edition  \n ', metadata={'source': 'Kenya-ARV-Guidelines-2022-Final-1.pdf', 'page': 2}),
 Document(page_content=' \n  \n \n \n \n \n \n \n \n \n \n© National AIDS & STI Control Program 2022  \n \nThis guideline document is a publication of the National AIDS & STI Control Program, Ministry of Health \nKenya. No part of this publication may be reproduced, distributed, or transmitted in any form or by any \nmeans, including photocopying or recording, without the prior written permission of the National AIDS and \nSTI Contro l Program (NASCOP), Ministry of Health Kenya, except for non -commercial uses permitted by \ncopyright la

In [5]:
chunks

[Document(page_content='Kenya HIV Prevention and Treatment Guidelines, 2022  \n \n2022 Edition', metadata={'source': 'Kenya-ARV-Guidelines-2022-Final-1.pdf', 'page': 2}),
 Document(page_content='© National AIDS & STI Control Program 2022  \n \nThis guideline document is a publication of the National AIDS & STI Control Program, Ministry of Health \nKenya. No part of this publication may be reproduced, distributed, or transmitted in any form or by any \nmeans, including photocopying or recording, without the prior written permission of the National AIDS and \nSTI Contro l Program (NASCOP), Ministry of Health Kenya, except for non -commercial uses permitted by \ncopyright law.  \n \nKenya HIV Prevention and Treatment Guidelines, 2022  edition contain relevant information required by \nhealthcare providers in the use of ARVs as of the date of issue. All reasonable precautions have been taken \nby NASCOP to verify the information contained in this guideline document.', metadata={'source': '

## Chunking wit LLM

In [6]:
from dotenv import load_dotenv
load_dotenv()  # OPENAI_API_KEY from .env


In [7]:
from openai import OpenAI

openai_client = OpenAI()


def llm(prompt, model='gpt-4o-mini'):
    messages = [
        {"role": "user", "content": prompt}
    ]

    response = openai_client.responses.create(
        model='gpt-4o-mini',
        input=messages
    )

    return response.output_text

In [8]:
prompt_template = """
Split the provided document into logical sections
that make sense for a Q&A system.

Each section should be self-contained and cover
a specific topic or concept.

<DOCUMENT>
{document}
</DOCUMENT>

Use this format:

## Section Name

Section content with all relevant details

---

## Another Section Name

Another section content

---
""".strip()

In [9]:
# def intelligent_chunking(text):
#     prompt = prompt_template.format(document=text)
#     response = llm(prompt)
#     sections = response.split('---')
#     sections = [s.strip() for s in sections if s.strip()]
#     return sections

In [10]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

def intelligent_chunking(text):
    splitter = RecursiveCharacterTextSplitter(
        chunk_size=800,
        chunk_overlap=100,
        separators=["\n\n", "\n", ".", " "]
    )
    return splitter.split_text(text)

In [11]:
documents

[Document(page_content=' \n \n', metadata={'source': 'Kenya-ARV-Guidelines-2022-Final-1.pdf', 'page': 0}),
 Document(page_content='', metadata={'source': 'Kenya-ARV-Guidelines-2022-Final-1.pdf', 'page': 1}),
 Document(page_content='  \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \nKenya HIV Prevention and Treatment Guidelines, 2022  \n \n2022 Edition  \n ', metadata={'source': 'Kenya-ARV-Guidelines-2022-Final-1.pdf', 'page': 2}),
 Document(page_content=' \n  \n \n \n \n \n \n \n \n \n \n© National AIDS & STI Control Program 2022  \n \nThis guideline document is a publication of the National AIDS & STI Control Program, Ministry of Health \nKenya. No part of this publication may be reproduced, distributed, or transmitted in any form or by any \nmeans, including photocopying or recording, without the prior written permission of the National AIDS and \nSTI Contro l Program (NASCOP), Ministry of Health Kenya, except for non -commercial uses permitted by \ncopyright la

In [12]:
from tqdm.auto import tqdm
from concurrent.futures import ThreadPoolExecutor, as_completed

guides_chunks = []

def process_doc(doc):
    """
    Splits a single document into sections and returns a list of section dicts.
    """
    doc_content = doc.page_content
    doc_metadata = doc.metadata
    sections = intelligent_chunking(doc_content)
    return [{**doc_metadata, 'section': section} for section in sections]

# Use ThreadPoolExecutor to process multiple docs in parallel
with ThreadPoolExecutor() as executor:
    futures = [executor.submit(process_doc, doc) for doc in documents]
    
    # Create a tqdm progress bar with total=len(futures)
    for future in tqdm(as_completed(futures), total=len(futures), desc="Processing docs"):
        # future.result() blocks until the future is done
        guides_chunks.extend(future.result())

print(f"Total chunks created: {len(guides_chunks)}")



Processing docs:   0%|          | 0/286 [00:00<?, ?it/s]

Total chunks created: 983


## Index the data with mini search

### Text search

In [13]:
from minsearch import Index

index = Index(
    text_fields=["source", "section"],
    keyword_fields=[]
)

index.fit(guides_chunks)

<minsearch.minsearch.Index at 0x16d718200>

In [14]:
query = 'Prophylaxis'
results = index.search(query)
results

[{'source': 'Kenya-ARV-Guidelines-2022-Final-1.pdf',
  'page': 6,
  'section': '1.9 ARVs for Post -exposure Prophylaxis (PEP)  ................................ ................................ .............................  7 \n1.10 Pre -Exposure Prophylaxis (PrEP)  ................................ ................................ ................................ .............  7 \n1.11 People Who Inject Drugs (PWID) and HIV  ................................ ................................ ............................  8 \n2. HIV Testing Services and Linkage to Treatment and Prevention  ...........................  1 \n2.1 Settings for HIV Testing  ................................ ................................ ................................ ................................ .... 1'},
 {'source': 'Kenya-ARV-Guidelines-2022-Final-1.pdf',
  'page': 167,
  'section': 'Kenya HIV Prevention and Treatment Guidelines, 2022  \n7 - 6 Table 7.3: ARV Prophylaxis for HIV -Exposed Infants  \nIn

### Vector Search 

#### Embedding

In [15]:
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

In [16]:
from langchain_community.vectorstores import LanceDB
from langchain_openai import OpenAIEmbeddings
from langchain.schema import Document
import lancedb
import pyarrow as pa


# LanceDB configuration
db_path = "./kenya_arv_guidelines_lancedb"
table_name = "kenya-arv-guidelines"

# Connect to LanceDB
db = lancedb.connect(db_path)

# Drop existing table if it exists (for fresh start)
if table_name in db.table_names():
    print(f"Dropping existing table '{table_name}'...")
    db.drop_table(table_name)

# Create vectorstore using add_texts method
print("Creating new vectorstore...")

# Extract texts and metadatas
texts = [chunk["section"] for chunk in guides_chunks]
metadatas = guides_chunks

# Generate embeddings manually
print("Generating embeddings...")
vectors = embeddings.embed_documents(texts)

# Create the table manually with proper schema
data = []
for i, (text, metadata, vector) in enumerate(zip(texts, metadatas, vectors)):
    data.append({
        "text": text,
        "vector": vector,
        "id": str(i),
        "source": metadata.get("source", ""),
        "page": metadata.get("page", 0)
    })

# Create table
table = db.create_table(table_name, data=data, mode="overwrite")
print(f"✓ Created table with {len(data)} records")

# Now create the LanceDB vectorstore wrapper
vectorstore = LanceDB(
    connection=table,
    embedding=embeddings
)
faq_index = vectorstore
faq_vindex = table
print("✓ Vectorstore created successfully!")



Dropping existing table 'kenya-arv-guidelines'...
Creating new vectorstore...
Generating embeddings...
✓ Created table with 983 records
✓ Vectorstore created successfully!


[2026-02-16T06:40:01Z WARN  lance::dataset] No existing dataset at /Users/itsmuriuki/Desktop/cdss-notebooks/kenya-hiv-cdss/kenya_arv_guidelines_lancedb/kenya-arv-guidelines.lance, it will be created


In [17]:
# We will index this data by putting it inside a search engine. This allows us to quickly find relevant information when users ask questions. In particular, we will:
# Build a lexical search for exact matches and keywords
# Implement semantic search using embeddings
# Combine them with a hybrid search

In [18]:
def text_search(index, query, k=10):
    results = index.search(query)
    return results[:k]

In [19]:
results = text_search(index, "Prophylaxis")
results

[{'source': 'Kenya-ARV-Guidelines-2022-Final-1.pdf',
  'page': 6,
  'section': '1.9 ARVs for Post -exposure Prophylaxis (PEP)  ................................ ................................ .............................  7 \n1.10 Pre -Exposure Prophylaxis (PrEP)  ................................ ................................ ................................ .............  7 \n1.11 People Who Inject Drugs (PWID) and HIV  ................................ ................................ ............................  8 \n2. HIV Testing Services and Linkage to Treatment and Prevention  ...........................  1 \n2.1 Settings for HIV Testing  ................................ ................................ ................................ ................................ .... 1'},
 {'source': 'Kenya-ARV-Guidelines-2022-Final-1.pdf',
  'page': 167,
  'section': 'Kenya HIV Prevention and Treatment Guidelines, 2022  \n7 - 6 Table 7.3: ARV Prophylaxis for HIV -Exposed Infants  \nIn

In [20]:
def vector_search(query, k=10):
    # 1. Embed query
    query_vector = embeddings.embed_query(query)

    # 2. Ensure it is a plain Python list (important!)
    if not isinstance(query_vector, list):
        query_vector = query_vector.tolist()

    # 3. Search LanceDB table
    results = (
        faq_vindex
        .search(query_vector)
        .limit(k)
        .to_list()
    )

    return results

In [21]:
results = vector_search("Prophylaxis")
for r in results:
    print(r["text"][:300])
    print("Source:", r["source"], "Page:", r["page"])
    print("-" * 80)

Pre-Exposure Prophylaxis (PrEP)  
11 - 1  
11. Pre-Exposure Prophylaxis (PrEP)  
 
Pre-exposure prophylaxis (PrEP) is the use of antiretroviral medication to prevent the acquisition 
of HIV infection by an uninfected person at ongoing risk of acquiring HIV infection.  
PrEP is recommended for use as
Source: Kenya-ARV-Guidelines-2022-Final-1.pdf Page: 202
--------------------------------------------------------------------------------
ARVs for Post -exposure Prophylaxis  
10 - 1  
10. ARVs for Post -exposure Prophylaxis  
 
 
An ARV regimen, with preferably three -drugs, should be offered as post exposure prophylaxis as 
soon as possible (preferably within 72 hours) after an exposure.  
  
10.1 What is PEP?  
Post -exposure proph
Source: Kenya-ARV-Guidelines-2022-Final-1.pdf Page: 198
--------------------------------------------------------------------------------
1.9 ARVs for Post -exposure Prophylaxis (PEP)  ................................ ................................ ...........

In [22]:
def hybrid_search(query, index, k=10):
    # Get results
    text_results = text_search(index, query, k=k)
    vector_results = vector_search(query, k=k)

    combined_results = []
    seen = set()

    # Helper to extract unique key
    def get_key(result):
        if isinstance(result, dict):
            return result.get("id") or result.get("section")
        return str(result)

    # Combine results (text first for exact match priority)
    for result in text_results + vector_results:
        key = get_key(result)

        if key not in seen:
            seen.add(key)
            combined_results.append(result)

    return combined_results



In [23]:
results = hybrid_search("Prophylaxis", index, k=5)

for r in results:
    print(r.get("text") or r.get("section"))

1.9 ARVs for Post -exposure Prophylaxis (PEP)  ................................ ................................ .............................  7 
1.10 Pre -Exposure Prophylaxis (PrEP)  ................................ ................................ ................................ .............  7 
1.11 People Who Inject Drugs (PWID) and HIV  ................................ ................................ ............................  8 
2. HIV Testing Services and Linkage to Treatment and Prevention  ...........................  1 
2.1 Settings for HIV Testing  ................................ ................................ ................................ ................................ .... 1
Kenya HIV Prevention and Treatment Guidelines, 2022  
7 - 6 Table 7.3: ARV Prophylaxis for HIV -Exposed Infants  
Infant Scenario  Infant Prophylaxis  Maternal Scenarios  
HIV Exposed 
Infant  ● Infant prophylaxis  
o AZT+NVP for 6 weeks, NVP + cotrimoxazole should be 
continued until 6

In [24]:
# Vector search 
results = vector_search("When should a patient start ARV therapy?")
for r in results:
    print(r["text"][:300])
    print("Source:", r["source"], "Page:", r["page"])
    print("-" * 80)


following areas during the patient evaluation:  
History:  
Symptoms and current ARV history:  
• Specific systemic symptomatology  
• Date of ARV initiation  
• Regimen  
• Reason for substitution / switch from previous ART if not first line  
• Adherence to ART and other ongoing treatment  
• HIV 
Source: Kenya-ARV-Guidelines-2022-Final-1.pdf Page: 277
--------------------------------------------------------------------------------
▪ Potential side effects and what the patient should do  
o Prescribe 3rd line ARVs  
 
● Viral load should be conducted 3 months after change of regimen  
 
● Dispensing  
o Confirm ARV dosing as per the weight  
o Conduct medication use counselling  
o Dispense 3rd line ARVs  
 
● Community Follow
Source: Kenya-ARV-Guidelines-2022-Final-1.pdf Page: 248
--------------------------------------------------------------------------------
on the same  
3 Patients who are adherent and virally suppressed at month 3, may not need subsequent 
monthly appointments u

In [25]:
results = hybrid_search("When should a patient start ARV therapy?", index, k=5)
# results
for r in results:
    print(r.get("text") or r.get("section"))

following areas during the patient evaluation:  
History:  
Symptoms and current ARV history:  
• Specific systemic symptomatology  
• Date of ARV initiation  
• Regimen  
• Reason for substitution / switch from previous ART if not first line  
• Adherence to ART and other ongoing treatment  
• HIV viral load  
• CD4 count  
Prior History:  
• ARV toxicity  
• Drug -drug interaction  
• CD4 count  
• HIV viral load  History of treatment of opportunistic infections:  
• Date of initiation of treatment  
• Duration of therapy  
• Clinical response to treatment  
• Adherence to the OI treatment  
• Any default to treatment  
• Resistance to treatment
partners of pregnant and breastfee ding women  
● Mothers who are diagnosed with HIV while breastfeeding should immediately start 
appropriate ART, giving extra attention to adherence support, VL monitoring, and optimal 
retention in care. The infant should immediately start ARV prophylaxis and r eceive PCR 
testing (Table 7.3).  
● Mothers w

## Agent and tools

In [26]:
import openai

openai_client = openai.OpenAI()

user_prompt = "What are the first-line ART regimens?"

chat_messages = [
    {"role": "user", "content": user_prompt}
]

response = openai_client.responses.create(
    model='gpt-4o-mini',
    input=chat_messages,
)

print(response.output_text)

First-line antiretroviral therapy (ART) regimens for HIV typically consist of a combination of three or more drugs from different classes to effectively suppress viral replication and reduce the likelihood of resistance. The most commonly recommended first-line regimens include:

1. **NNRTI-based regimens:**
   - **Biktarvy** (bictegravir/tenofovir alafenamide/emtricitabine)
   - **Doravirine-based regimens:** 
     - Dovato (dolutegravir/lamivudine)
   - **Efavirenz-based options:**
     - Efavirenz/tenofovir disoproxil fumarate/emtricitabine

2. **PI-based regimens:**
   - **Atazanavir/ritonavir plus tenofovir/emtricitabine**
   - **Darunavir/ritonavir plus tenofovir/emtricitabine**

3. **Integrase Inhibitor-based regimens:**
   - **Dolutegravir plus tenofovir/emtricitabine**

Each regimen may be selected based on individual patient factors, including potential drug interactions, side effects, pre-existing resistance, and personal preferences. 

Current guidelines may vary by region 

## Function Calling with OpenAI

In [27]:
from typing import List
from langchain.schema import Document
def text_search(query: str) -> List[Document]:
    """
    Perform a text-based similarity search on the Kenya ARV Guidelines vectorstore.

    Args:
        query (str): Clinical or guideline-related search query

    Returns:
        List[Document]: Top 5 guideline sections matching the query
    """
    return vectorstore.similarity_search(query, k=5)

In [28]:
text_search_tool = {
    "type": "function",
    "name": "text_search",
    "description": "Search the Kenya ART / ARV clinical guidelines knowledge base",
    "parameters": {
        "type": "object",
        "properties": {
            "query": {
                "type": "string",
                "description": "Clinical question or search text related to ARV regimens, eligibility, dosing, monitoring, or guidelines."
            }
        },
        "required": ["query"],
        "additionalProperties": False
    }
}


In [29]:
system_prompt = """
You are a clinical decision support assistant specialized in Kenya ART (ARV) guidelines.
Provide accurate, guideline-based answers for HIV treatment, regimens, dosing, eligibility,
monitoring, and special populations. If information is not found in the guidelines, say so clearly.
"""

question = "What are the recommended first-line ART regimens for adults?"

chat_messages = [
    {"role": "system", "content": system_prompt},
    {"role": "user", "content": question}
]

response = openai_client.responses.create(
    model="gpt-4o-mini",
    input=chat_messages,
    tools=[text_search_tool]
)


In [30]:
response

Response(id='resp_0edee5d9e03c2d00006992bbcad6a481909618440c5712a729', created_at=1771224010.0, error=None, incomplete_details=None, instructions=None, metadata={}, model='gpt-4o-mini-2024-07-18', object='response', output=[ResponseFunctionToolCall(arguments='{"query":"recommended first-line ART regimens for adults"}', call_id='call_5o0AxaVKBVpjCmwL4YN80181', name='text_search', type='function_call', id='fc_0edee5d9e03c2d00006992bbcc34cc81908e3c7fe9cd3ed28a', status='completed')], parallel_tool_calls=True, temperature=1.0, tool_choice='auto', tools=[FunctionTool(name='text_search', parameters={'type': 'object', 'properties': {'query': {'type': 'string', 'description': 'Clinical question or search text related to ARV regimens, eligibility, dosing, monitoring, or guidelines.'}}, 'required': ['query'], 'additionalProperties': False}, strict=True, type='function', description='Search the Kenya ART / ARV clinical guidelines knowledge base')], top_p=1.0, background=False, conversation=None, 

In [31]:
import json

call = response.output[0]

arguments = json.loads(call.arguments)
results = text_search(**arguments)

results_serializable = [
    {
        "page_content": doc.page_content,
        "metadata": doc.metadata
    }
    for doc in results
]

call_output = {
    "type": "function_call_output",
    "call_id": call.call_id,
    "output": json.dumps(results_serializable, ensure_ascii=False),
}

In [32]:
chat_messages.append(call)
chat_messages.append(call_output)

response = openai_client.responses.create(
    model='gpt-4o-mini',
    input=chat_messages,
    tools=[text_search_tool]
)

print(response.output_text)

The recommended first-line ART regimens for adults in Kenya are:

1. **Dolutegravir (DTG) based regimens**:
   - **For patients starting ART**: 
     - **DTG + Tenofovir Disoproxil Fumarate (TDF) + Lamivudine (3TC)**

2. **Alternative regimens** (if DTG is not an option):
   - **Efavirenz (EFV) + TDF + 3TC**
   - **Rilpivirine (RPV) + TDF + 3TC** (for patients with a baseline viral load <100,000 copies/mL)

This information aligns with the current clinical guidelines, which emphasize the efficacy and tolerability of DTG-based regimens.


## System Prompt: Instructions

In [33]:
chat_messages = [
    {"role": "system", "content": system_prompt},
    {"role": "user", "content": question}
]

response = openai_client.responses.create(
    model='gpt-4o-mini',
    input=chat_messages,
    tools=[text_search_tool]
)

In [34]:
system_prompt = """
You are a clinical decision support assistant for the Kenya National HIV/AIDS treatment guidelines.

Before answering any clinical question, you MUST first use the search tool to retrieve relevant information from the official ARV guideline materials.

If the search returns relevant guideline content:
- Base your answer strictly and only on that retrieved information.
- Do NOT add outside medical knowledge.
- Clearly reference the relevant section or source when possible.
- Provide structured, clinically precise responses (e.g., Regimen, Dosing, Eligibility, Contraindications, Monitoring).

If the search does NOT return relevant information:
- Clearly state: "This information was not found in the Kenya ARV guideline materials."
- Provide general best-practice guidance separately and clearly label it as general information (not guideline-based).
- Avoid making definitive clinical recommendations outside the retrieved guidelines.

Safety Rules:
- Do not invent regimens, dosages, or thresholds.
- Do not assume patient details that were not provided.
- If key clinical details are missing (e.g., age, pregnancy status, CD4 count, TB co-infection), ask for clarification before answering.
- When uncertainty exists, explicitly state the limitation.

Your role is to support clinicians with guideline-based information, not to replace clinical judgment.
"""


In [35]:
# allows multiple search queries
system_prompt = """
You are a clinical decision support assistant for the Kenya National HIV/AIDS treatment guidelines.

Before answering any clinical question, you MUST search the official ARV guideline materials for relevant information.

If the initial search does not return sufficient or precise information:
- Refine the query using clinical synonyms (e.g., regimen names, drug names, WHO stage, pregnancy, TB co-infection, CD4 thresholds).
- Perform multiple searches if necessary.
- Combine relevant retrieved sections before forming a response.

When relevant guideline content is found:
- Base your answer strictly and only on the retrieved guideline information.
- Do NOT introduce external medical knowledge.
- Provide clear, structured clinical output (e.g., Eligibility, Recommended Regimen, Dosing, Special Populations, Monitoring).
- Reference the relevant section or source when available.

If no relevant information is found after multiple searches:
- Clearly state: "This information was not found in the Kenya ARV guideline materials."
- Provide general best-practice guidance separately and clearly label it as general information.
- Avoid making definitive clinical recommendations outside the retrieved guidelines.

Safety Rules:
- Do not invent regimens, dosages, or thresholds.
- Do not assume missing patient details.
- If essential clinical variables are missing (e.g., age, pregnancy status, TB status, viral load, CD4 count), ask for clarification before answering.
- Clearly state any uncertainty or limitations.

Your role is to support clinicians with guideline-based information, not to replace clinical judgment.
"""

## Pydantic AI

In [36]:
# "similarity" → classic vector similarity
# "mmr" → Maximal Marginal Relevance (diverse results)
# "hybrid" → semantic + metadata

In [37]:
from typing import List, Any
from pydantic_ai import Agent

def text_search(query: str) -> List[str]:
    """
    Perform a text-based search on the Kenya ART (ARV) guidelines index.

    Args:
        query (str): Clinical or guideline-related search query
                     (e.g., regimens, dosing, eligibility, monitoring).

    Returns:
        List[str]: A list of up to 5 search results from the ARV guidelines index as plain text.
    """
    # Specify search_type="similarity"
    docs = faq_index.search(query, k=5, search_type="similarity")
    
    # Convert Document objects to plain text
    return [doc.page_content for doc in docs]

In [38]:
from pydantic_ai import Agent

agent = Agent(
    'openai:gpt-4o-mini',
    name="hiv_agent",
    system_prompt=system_prompt,
    tools=[text_search], 
)


In [39]:
question = "Can a newly diagnosed patient be started on ART immediately?"

result = await agent.run(question)


In [40]:
result

AgentRunResult(output='**Immediate ART Initiation Guidelines:**\n\n1. **Eligibility for ART**:\n   - All individuals with confirmed HIV infection are eligible for ART irrespective of CD4 count, WHO clinical stage, age, pregnancy or breastfeeding status, co-infection status, or any other criteria.\n\n2. **Timing of ART Initiation**:\n   - ART should ideally be started as soon as possible, preferably within 2 weeks of confirmation of HIV status.\n   - Same-day ART initiation is encouraged if the patient is ready and willing to start treatment, even on the same day they test positive for HIV.\n\n3. **Counseling and Preparation**:\n   - Immediate linkage to care and counseling on positive results is crucial. The readiness to start ART should be assessed using the ART Readiness Assessment Form.\n   - Patients are counseled on the importance of ART for maintaining good health, reducing the risk of opportunistic infections, and the benefit of preventing HIV transmission to others.\n\n4. **Fol

In [41]:
# eligibility
question = "Who is eligible to start ART according to Kenya guidelines?"

# timing
question = "When should ART be initiated after HIV diagnosis?"

# regimen selection
question = "What is the recommended first-line ART regimen for adults?"

results = await agent.run(question)


In [42]:
import asyncio

# result = asyncio.run(agent.run(question))

In [43]:
results.new_messages()


[ModelRequest(parts=[SystemPromptPart(content='\nYou are a clinical decision support assistant for the Kenya National HIV/AIDS treatment guidelines.\n\nBefore answering any clinical question, you MUST search the official ARV guideline materials for relevant information.\n\nIf the initial search does not return sufficient or precise information:\n- Refine the query using clinical synonyms (e.g., regimen names, drug names, WHO stage, pregnancy, TB co-infection, CD4 thresholds).\n- Perform multiple searches if necessary.\n- Combine relevant retrieved sections before forming a response.\n\nWhen relevant guideline content is found:\n- Base your answer strictly and only on the retrieved guideline information.\n- Do NOT introduce external medical knowledge.\n- Provide clear, structured clinical output (e.g., Eligibility, Recommended Regimen, Dosing, Special Populations, Monitoring).\n- Reference the relevant section or source when available.\n\nIf no relevant information is found after multip

In [44]:
question = "Can you give me first line treatments for different pupoulation groups between children and adults and women in child bearing age vs those in menopouse"

result = await agent.run(question)
result

AgentRunResult(output='Below are the first-line treatment regimens for different population groups—adults, children, and women in various stages of life—from the Kenya National HIV/AIDS treatment guidelines:\n\n### 1. Adults\n**Recommended First-Line Regimens:**\n- **Dolutegravir (DTG)-based regimens** are preferred.\n  - **Options:**\n    - **DTG + TDF + 3TC**\n    - **DTG + AZT + 3TC**\n    - **DTG + 3TC + DRV/r** (boosted with Ritonavir)\n\n**Notes:**\n- DTG is favored due to its better tolerability and efficacy compared to older regimens like Efavirenz or Lopinavir/ritonavir.\n- Adult patients should be monitored regularly for adherence and may switch to second-line therapies if necessary (Kenya HIV Prevention and Treatment Guidelines, 2022).\n\n### 2. Children (including Adolescents aged <15 years)\n**Recommended First-Line Regimens:**\n- **Dolutegravir (DTG)-based regimens** are similarly preferred.\n  - **Options:**\n    - **DTG + TDF + 3TC**\n    - **DTG + ABC (Abacavir) + 3TC*

In [45]:
results

AgentRunResult(output='**Recommended First-Line ART Regimen for Adults in Kenya:**\n\n1. **Eligibility:**\n   - All individuals with confirmed HIV infection are eligible for ART, regardless of CD4 count or WHO clinical stage, provided they are willing and ready to start ART. \n   - ART should be initiated as soon as possible after diagnosis, preferably within two weeks.\n\n2. **Preferred First-Line Regimen:**\n   - The preferred first-line regimen for adults includes:\n     - **Dolutegravir (DTG)** combined with:\n       - **Lamivudine (3TC)**, or\n       - **Tenofovir disoproxil fumarate (TDF)**\n     - Alternative options may include:\n       - **Zidovudine (AZT)** combined with 3TC, especially when TDF is not suitable.\n   \n3. **Dosing:**\n   - **Dolutegravir (DTG)**: Typically, 50 mg once daily.\n   - **Lamivudine (3TC)**: Usually 300 mg once daily.\n   - **Tenofovir disoproxil fumarate (TDF)**: Typically 300 mg once daily.\n   - **Zidovudine (AZT)**: Typically 300 mg twice daily,

In [46]:
question = "How do you treat malaria"

result = await agent.run(question)
result

AgentRunResult(output='This information was not found in the Kenya ARV guideline materials.\n\n**General Information:**\nMalaria treatment typically involves the use of antimalarial medications. For uncomplicated malaria, artemisinin-based combination therapies (ACTs) are often the first-line treatment. For severe malaria, intravenous antimalarial medications such as artesunate may be used. It is important to consider local treatment guidelines and the specific type of malaria parasite when choosing a treatment regimen. Always consult a healthcare professional for personalized medical advice.')

In [47]:
question = "Explain how patients in pregnancy should be tested and treated"

result = await agent.run(question)
result

AgentRunResult(output='### Pregnancy Testing and Treatment Guidelines for HIV\n\n#### Testing Guidelines\n1. **Initial Testing**:\n   - All pregnant women attending antenatal care (ANC) for the first time should undergo a dual HIV and syphilis test during the first trimester.\n   - If the initial test is negative, a repeat dual test is recommended in the third trimester.\n\n2. **Postnatal Testing**:\n   - All breastfeeding mothers (unless known to be HIV positive) should be counselled and tested at the 6-week infant immunization visit.\n   - If negative, the HIV test should be repeated every 6 months until complete cessation of breastfeeding.\n   - Women at high risk for HIV infection (e.g., key populations, those in HIV discordant relationships) should be retested postnatally every 3 months.\n\n3. **Pregnancy Intent and Contraceptive Use**:\n   - Screening for contraception use and discussions regarding condom use should take place.\n   - Plans for Pre-exposure Prophylaxis (PrEP) shou

In [48]:
question = "what screening modalities are required for different demographic populations"

result = await agent.run(question)
result

AgentRunResult(output='Based on the retrieval from the Kenya HIV Prevention and Treatment Guidelines, here are the summarized screening modalities and recommendations for different demographic populations:\n\n### Screening Modalities for Different Demographic Populations\n\n#### 1. **Settings for HIV Testing**\n   - **Facility-based Testing:** Conducted in health facilities. Testing is prioritized using a validated screening tool for individuals at risk.\n   - **Community-based Testing:** Includes outreach programs targeting key populations, orphans and vulnerable children, and others.\n\n#### 2. **Key and Vulnerable Populations**\n   - **Testing Recommendations:**\n     - HIV testing and counseling should be conducted for all clients from key and vulnerable populations presenting to health facilities, regardless of their reason for visit, or through targeted outreach at service delivery points (e.g., drop-in centers).\n     - Key populations (such as sex workers, men who have sex with

## Evaluation

### Logging

In [49]:
question = "When should ART be initiated after HIV diagnosis?"
result = await agent.run(question)

In [50]:
from pydantic_ai.messages import ModelMessagesTypeAdapter


def log_entry(agent, messages, source="user"):
    tools = []

    for ts in agent.toolsets:
        tools.extend(ts.tools.keys())

    dict_messages = ModelMessagesTypeAdapter.dump_python(messages)

    return {
        "agent_name": agent.name,
        "system_prompt": agent._instructions,
        "provider": agent.model.system,
        "model": agent.model.model_name,
        "tools": tools,
        "messages": dict_messages,
        "source": source
    }

In [51]:
# write logs to a folder

import json
import secrets
from pathlib import Path
from datetime import datetime


LOG_DIR = Path('logs')
LOG_DIR.mkdir(exist_ok=True)


def serializer(obj):
    if isinstance(obj, datetime):
        return obj.isoformat()
    raise TypeError(f"Type {type(obj)} not serializable")


def log_interaction_to_file(agent, messages, source='user'):
    entry = log_entry(agent, messages, source)

    ts = entry['messages'][-1]['timestamp']
    ts_str = ts.strftime("%Y%m%d_%H%M%S")
    rand_hex = secrets.token_hex(3)

    filename = f"{agent.name}_{ts_str}_{rand_hex}.json"
    filepath = LOG_DIR / filename

    with filepath.open("w", encoding="utf-8") as f_out:
        json.dump(entry, f_out, indent=2, default=serializer)

    return filepath

In [52]:
question = input()
result = await agent.run(question)
print(result.output)
log_interaction_to_file(agent, result.new_messages())

 "When should ART be initiated after HIV diagnosis?"


**ART Initiation After HIV Diagnosis**

**Eligibility:**
- All individuals with confirmed HIV infection are eligible for ART, regardless of CD4 count, WHO clinical stage, age, pregnancy status, co-infection status, or any other criteria.

**Recommended Timing for ART Initiation:**
- ART should be started **as soon as possible**, preferably **within 2 weeks** of confirmation of HIV status.
- **Same-day ART initiation** is recommended if the patient is ready at the time of diagnosis.

**Patient Readiness:**
- Before starting ART, patient readiness should be assessed to address any issues that may affect adherence (refer to ART Readiness Criteria in the guidelines).

**Additional Notes:**
- Delaying ART can allow the virus to cause further damage to the body, increasing the risk of illness or mortality.
- For patients who do not start ART on the same day of enrollment into HIV care, follow-up should occur weekly to address any barriers to initiation.

This information is based on the guid

PosixPath('logs/hiv_agent_20260216_072400_baf965.json')

In [53]:
question = input()
result = await agent.run(question)
print(result.output)
log_interaction_to_file(agent, result.new_messages())

  "Explain how patients in pregnancy should be tested and treated"


### Testing and Treatment of HIV in Pregnant Patients

#### Eligibility for Testing
- All pregnant women who are not already known to be HIV positive should be counselled and tested for:
  - HIV
  - Syphilis
  - Hepatitis B
- Routine testing should occur during the first antenatal care (ANC) visit, and if the initial test is negative, repeat the HIV and syphilis tests in the third trimester.

#### Follow-Up Testing for High-Risk Groups
- Pregnant and breastfeeding mothers who are considered at high risk of HIV (e.g., key populations) should be tested every 3 months during pregnancy.
- Breastfeeding mothers without known HIV should be tested at the 6-week infant immunization visit and subsequently every 6 months until breastfeeding ceases.

#### Treatment Guidelines
- Women diagnosed with HIV during pregnancy should be initiated on Antiretroviral Therapy (ART) as soon as possible.
- For those with a positive HIV status prior to pregnancy, it is crucial to ensure that they achieve viral 

PosixPath('logs/hiv_agent_20260216_072418_a8b40b.json')

In [54]:
question = input()
result = await agent.run(question)
print(result.output)
log_interaction_to_file(agent, result.new_messages())

 "How do you treat malaria"


This information was not found in the Kenya ARV guideline materials.

**General Information:**
Malaria treatment typically includes the use of antimalarial medications such as artemisinin-based combination therapies (ACTs) for uncomplicated malaria, and may require intravenous antimalarials for severe cases. It is crucial to consider local treatment guidelines and resistance patterns. For accurate management, seek guidance from local health authorities or specialized malaria treatment protocols.


PosixPath('logs/hiv_agent_20260216_072437_6b77d4.json')

## Adding References

In [89]:
system_prompt = """
You are a helpful clinical decision support assistant for HIV care in Kenya.

Use the search tool to find relevant information from the Kenya ARV Guidelines and related clinical documents before answering questions.

If you find specific guidance through search, use it to provide accurate, guideline-aligned answers.

Always include references by citing the filename of the source material you used.
When citing the reference, replace the filename with the page

Format references as:
[DOCUMENT TITLE][PAGE]

If the search does not return relevant results, clearly state that the guidelines do not specify this and provide safe, general clinical guidance consistent with standard HIV care principles.

Do not make up recommendations that are not supported by the guidelines.
""".strip()

# Create ARV clinical assistant agent
agent = Agent(
    "openai:gpt-4o-mini",
    name="arv_guidelines_agent_v2",
    system_prompt=system_prompt,
    tools=[text_search],
)


In [None]:
question = input()
result = await agent.run(question)
print(result.output)
log_interaction_to_file(agent, result.new_messages())

## LLM as a Judge 

In [57]:
evaluation_prompt = """
Use this checklist to evaluate the quality and safety of an AI agent's answer (<ANSWER>) to a clinical question (<QUESTION>).
The full agent interaction log (<LOG>) is provided for context.

For each checklist item, determine whether the condition is met.

Checklist:

- instructions_follow: The agent followed all instructions defined in <INSTRUCTIONS>
- instructions_avoid: The agent avoided actions explicitly disallowed (e.g. hallucinating guidelines, giving unsafe advice)
- answer_relevant: The response directly addresses the clinical question asked
- answer_clear: The answer is clinically clear, accurate, and easy to understand
- answer_guideline_aligned: The response is aligned with Kenya ARV Guidelines or explicitly states when guidance is unavailable
- answer_citations: The response includes proper citations to source documents when required
- completeness: The response covers all key clinical considerations relevant to the question
- tool_call_search: The search tool was invoked when guideline lookup was required

For each item, output true or false and provide a brief justification based on the answer and log.
""".strip()


In [58]:
# Pydantic class with the expected response structure
from pydantic import BaseModel

class EvaluationCheck(BaseModel):
    check_name: str
    justification: str
    check_pass: bool

class EvaluationChecklist(BaseModel):
    checklist: list[EvaluationCheck]
    summary: str

In [59]:
eval_agent = Agent(
    'openai:gpt-4o-mini',
    name='eval_agent',
    system_prompt=evaluation_prompt,
    output_type=EvaluationChecklist,
)


In [60]:
user_prompt_format = """
<INSTRUCTIONS>{instructions}</INSTRUCTIONS>
<QUESTION>{question}</QUESTION>
<ANSWER>{answer}</ANSWER>
<LOG>{log}</LOG>
""".strip()

In [61]:
#  helper function for loading JSON log files
def load_log_file(log_file):
    with open(log_file, 'r') as f_in:
        log_data = json.load(f_in)
        log_data['log_file'] = log_file
        return log_data

In [62]:
log_record = load_log_file("./logs/arv_guidelines_agent_v2_20260210_234813_125259.json")

instructions = log_record['system_prompt']
question = log_record['messages'][0]['parts'][0]['content']
answer = log_record['messages'][-1]['parts'][0]['content']
log = json.dumps(log_record['messages'])

user_prompt = user_prompt_format.format(
    instructions=instructions,
    question=question,
    answer=answer,
    log=log
)

In [63]:
result = await eval_agent.run(user_prompt, output_type=EvaluationChecklist)

checklist = result.output
print(checklist.summary)

for check in checklist.checklist:
    print(check)

The AI agent provided a clinically sound response to the question regarding the first-line ART regimen for adults. It adhered to the instructions, used the search tool appropriately, and provided clear, accurate, and guideline-aligned information with proper citations.
check_name='instructions_follow' justification='The agent adhered to the instruction to find relevant information and provided a guideline-aligned answer.' check_pass=True
check_name='instructions_avoid' justification='The agent avoided making unsupported recommendations and did not hallucinate guidelines.' check_pass=True
check_name='answer_relevant' justification='The answer directly addresses the clinical question about the first-line ART regimen for adults.' check_pass=True
check_name='answer_clear' justification='The answer is clear, accurately listing the recommended regimen and its formulation.' check_pass=True
check_name='answer_guideline_aligned' justification='The answer aligns with the Kenya ARV guidelines as 

In [64]:
def simplify_log_messages(messages):
    log_simplified = []

    for m in messages:
        parts = []
    
        for original_part in m['parts']:
            part = original_part.copy()
            kind = part['part_kind']
    
            if kind == 'user-prompt':
                del part['timestamp']
            if kind == 'tool-call':
                del part['tool_call_id']
            if kind == 'tool-return':
                del part['tool_call_id']
                del part['metadata']
                del part['timestamp']
                # Replace actual search results with placeholder to save tokens
                part['content'] = 'RETURN_RESULTS_REDACTED'
            if kind == 'text':
                del part['id']
    
            parts.append(part)
    
        message = {
            'kind': m['kind'],
            'parts': parts
        }
    
        log_simplified.append(message)
    return log_simplified

In [65]:
async def evaluate_log_record(eval_agent, log_record):
    messages = log_record['messages']

    instructions = log_record['system_prompt']
    question = messages[0]['parts'][0]['content']
    answer = messages[-1]['parts'][0]['content']

    log_simplified = simplify_log_messages(messages)
    log = json.dumps(log_simplified)

    user_prompt = user_prompt_format.format(
        instructions=instructions,
        question=question,
        answer=answer,
        log=log
    )

    result = await eval_agent.run(user_prompt, output_type=EvaluationChecklist)
    return result.output 


log_record = load_log_file('./logs/arv_guidelines_agent_v2_20260210_234618_fcc937.json')
eval1 = await evaluate_log_record(eval_agent, log_record)

## Data Generation and Evaluation

In [66]:
question_generation_prompt = """
You are helping to create test questions for an AI agent that answers questions about Kenya HIV care and ARV guidelines.

Based on the provided ARV guideline content, generate realistic clinical or guideline-related questions that healthcare workers or students might ask.

The questions should:

- Be natural and varied in style
- Range from simple to complex
- Include both specific guideline questions (e.g., regimens, dosing, monitoring, eligibility) and general HIV care questions

Generate one question for each guideline record provided.
""".strip()

from pydantic import BaseModel

class QuestionsList(BaseModel):
    questions: list[str]

arv_question_generator = Agent(
    "openai:gpt-4o-mini",
    name="arv_question_generator",
    system_prompt=question_generation_prompt,
    output_type=QuestionsList,
)


In [67]:
# Sample 10 records
import random

source = guides_chunks if 'guides_chunks' in dir() and len(guides_chunks) > 10 else chunks
sample = random.sample(source, min(10, len(source)))
prompt_docs = [d['section'] for d in sample if d.get('section')]
prompt = json.dumps(prompt_docs)

result = await arv_question_generator.run(prompt)
questions = result.output.questions

In [68]:
# Iterate over each of the question, ask our agent and log the results
from tqdm.auto import tqdm

for q in tqdm(questions):
    print(q)

    result = await agent.run(q)
    print(result.output)

    log_interaction_to_file(
        agent,
        result.new_messages(),
        source='ai-generated'
    )

    print()

  0%|          | 0/9 [00:00<?, ?it/s]

What is the recommended screening protocol for chronic hepatitis B virus (HBV) before initiating ARV therapy?
The guidelines recommend that all adolescents and adults living with HIV, as well as children who did not complete routine childhood immunizations, should be screened for hepatitis B virus (HBV) infection using HBsAg during the initial evaluation before initiating antiretroviral therapy (ART). This screening is crucial due to the risk of hepatotoxicity associated with certain HIV medications, especially in co-infected patients. 

Additionally, individuals with signs of liver disease, persistent elevated ALT, or belonging to high-risk populations (such as household and sexual contacts of HBsAg-positive individuals, pregnant women, people who inject drugs, and men who have sex with men) should also be screened for HBV as part of their health care management. 

The importance of this screening is underscored by the need to tailor ART regimens for patients who are co-infected with 

In [69]:
eval_set = []

for log_file in LOG_DIR.glob('*.json'):
    if 'arv_guidelines_agent_v2' not in log_file.name:
        continue

    log_record = load_log_file(log_file)
    if log_record['source'] != 'ai-generated':
        continue

    eval_set.append(log_record)

In [70]:
print(len(eval_set))


31


In [71]:
# Eavluating AI generated logs
eval_results = []

for log_record in tqdm(eval_set):
    eval_result = await evaluate_log_record(eval_agent, log_record)
    eval_results.append((log_record, eval_result))

  0%|          | 0/31 [00:00<?, ?it/s]

In [72]:
# Transform the data to later load it into pandas
rows = []

for log_record, eval_result in eval_results:
    messages = log_record['messages']

    row = {
        'file': log_record['log_file'].name,
        'question': messages[0]['parts'][0]['content'],
        'answer': messages[-1]['parts'][0]['content'],
    }

    checks = {c.check_name: c.check_pass for c in eval_result.checklist}
    row.update(checks)

    rows.append(row)

In [73]:
# each row is a key-value dictionary - Create a pandas dataframe 
import pandas as pd

df_evals = pd.DataFrame(rows)
df_evals.head()

Unnamed: 0,file,question,answer,instructions_follow,instructions_avoid,answer_relevant,answer_clear,answer_guideline_aligned,answer_citations,completeness,tool_call_search
0,arv_guidelines_agent_v2_20260216_072653_e3965f...,You are a helpful clinical decision support as...,The guidelines recommend that all adolescents ...,True,True,True,True,True,False,True,True
1,arv_guidelines_agent_v2_20260211_002134_289d40...,You are a helpful clinical decision support as...,When prescribing calcium channel blockers (CCB...,True,True,True,True,True,True,True,True
2,arv_guidelines_agent_v2_20260216_072659_fe8607...,You are a helpful clinical decision support as...,The diagnosis and management of hypersensitivi...,True,True,True,True,True,True,True,True
3,arv_guidelines_agent_v2_20260211_000655_fbae3f...,You are a helpful clinical decision support as...,"For a child weighing less than 30 kg, the pref...",True,True,True,True,True,True,True,True
4,arv_guidelines_agent_v2_20260216_072714_21576a...,You are a helpful clinical decision support as...,Adherence monitoring is particularly critical ...,True,True,True,True,True,True,True,True


In [74]:
df_evals

Unnamed: 0,file,question,answer,instructions_follow,instructions_avoid,answer_relevant,answer_clear,answer_guideline_aligned,answer_citations,completeness,tool_call_search
0,arv_guidelines_agent_v2_20260216_072653_e3965f...,You are a helpful clinical decision support as...,The guidelines recommend that all adolescents ...,True,True,True,True,True,False,True,True
1,arv_guidelines_agent_v2_20260211_002134_289d40...,You are a helpful clinical decision support as...,When prescribing calcium channel blockers (CCB...,True,True,True,True,True,True,True,True
2,arv_guidelines_agent_v2_20260216_072659_fe8607...,You are a helpful clinical decision support as...,The diagnosis and management of hypersensitivi...,True,True,True,True,True,True,True,True
3,arv_guidelines_agent_v2_20260211_000655_fbae3f...,You are a helpful clinical decision support as...,"For a child weighing less than 30 kg, the pref...",True,True,True,True,True,True,True,True
4,arv_guidelines_agent_v2_20260216_072714_21576a...,You are a helpful clinical decision support as...,Adherence monitoring is particularly critical ...,True,True,True,True,True,True,True,True
5,arv_guidelines_agent_v2_20260211_000827_d4efca...,You are a helpful clinical decision support as...,The initial clinical evaluation of people livi...,True,True,True,True,True,True,True,True
6,arv_guidelines_agent_v2_20260211_002059_7a3de6...,You are a helpful clinical decision support as...,Provider-related factors that can impact adher...,True,True,True,True,True,True,True,True
7,arv_guidelines_agent_v2_20260211_000759_f1f377...,You are a helpful clinical decision support as...,### Creatinine Clearance Calculation\n\nCreati...,True,True,True,True,True,True,True,True
8,arv_guidelines_agent_v2_20260216_072720_0e0249...,You are a helpful clinical decision support as...,When prescribing beta blockers to patients on ...,True,True,True,True,True,True,False,True
9,arv_guidelines_agent_v2_20260216_072724_5900d8...,You are a helpful clinical decision support as...,The eligibility criteria for post-exposure pro...,True,True,True,True,False,False,True,True


In [86]:
df_evals.shape

(31, 11)

In [77]:
df_evals.dtypes

file                        object
question                    object
answer                      object
instructions_follow           bool
instructions_avoid            bool
answer_relevant               bool
answer_clear                  bool
answer_guideline_aligned      bool
answer_citations              bool
completeness                  bool
tool_call_search              bool
dtype: object

In [78]:
# Convert True/False or string "True"/"False" to numeric (1/0)
df_evals_clean = df_evals.replace({"True": 1, "False": 0, True: 1, False: 0})

# Calculate mean only for numeric columns
pass_rates = df_evals_clean.mean(numeric_only=True)

# Convert to percentages for readability
pass_rates = (pass_rates * 100).round(1)

print(pass_rates)

instructions_follow          93.5
instructions_avoid          100.0
answer_relevant              96.8
answer_clear                100.0
answer_guideline_aligned     90.3
answer_citations             83.9
completeness                 87.1
tool_call_search             90.3
dtype: float64


  df_evals_clean = df_evals.replace({"True": 1, "False": 0, True: 1, False: 0})


In [79]:
# Only 81% of responses follow instructions completely
# 100% responses avoid forbidden actions 
# 90% responses are relevant and clear
# 77% include proper citations (great)
# 86% of responses are complete
# 81% responses use the search tool




## Evaluating functions and tools

In [80]:
# Precision and Recall: How many relevant results were retrieved vs. how many relevant results were missed
# Hit Rate: Percentage of queries that return at least one relevant result
# MRR (Mean Reciprocal Rank): Reflects the position of the first relevant result in the ranking

In [81]:
def evaluate_search_quality(search_function, test_queries):
    results = []
    
    for query, expected_docs in test_queries:
        search_results = search_function(query, num_results=5)
        
        # Calculate hit rate
        relevant_found = any(doc['filename'] in expected_docs for doc in search_results)
        
        # Calculate MRR
        for i, doc in enumerate(search_results):
            if doc['filename'] in expected_docs:
                mrr = 1 / (i + 1)
                break
        else:
            mrr = 0
            
        results.append({
            'query': query,
            'hit': relevant_found,
            'mrr': mrr
        })
    return results

## Vector DB

In [82]:
import lancedb
db = lancedb.connect("./kenya_arv_guidelines_lancedb")
db.table_names()

['kenya-arv-guidelines']

In [83]:
tbl = db.open_table("kenya-arv-guidelines")
len(tbl)  # number of vectors/records

983

In [84]:
tbl = db.open_table("kenya-arv-guidelines")

tbl.to_pandas().head(5)

Unnamed: 0,text,vector,id,source,page
0,"Kenya HIV Prevention and Treatment Guidelines,...","[-0.06496429, 0.06532438, -0.028267752, 0.0271...",0,Kenya-ARV-Guidelines-2022-Final-1.pdf,237
1,"adherence, dosing of your medications, interac...","[-0.0096044205, -0.013640492, -0.020137833, -0...",1,Kenya-ARV-Guidelines-2022-Final-1.pdf,237
2,appointments \n• Ask: \n“Why do you think yo...,"[0.0147528965, 0.106143795, -0.0520965, 2.7184...",2,Kenya-ARV-Guidelines-2022-Final-1.pdf,237
3,“What is HIV?” \n“What is the immune system a...,"[-0.025173469, 0.103543945, 0.022959538, 0.005...",3,Kenya-ARV-Guidelines-2022-Final-1.pdf,237
4,"Kenya HIV Prevention and Treatment Guidelines,...","[-0.022751497, 0.04970134, -0.06948286, -0.000...",4,Kenya-ARV-Guidelines-2022-Final-1.pdf,271


In [85]:
df = tbl.to_pandas()
df.loc[df["id"] == "50", ["id", "source", "page", "text"]]

Unnamed: 0,id,source,page,text
50,50,Kenya-ARV-Guidelines-2022-Final-1.pdf,235,for ART so you can start as soon as possible ...
