In [None]:
import json
filename= "../data/evaluation/evaluation_vector_store_results_2025-04-26_12-03-28.jsonl" #k=30
#filename = "../data/evaluation/evaluation_vector_store_results_2025-04-26_12-11-06.jsonl" #k=50
with open(filename, 'r') as f:
    lines = f.readlines()
    data = [json.loads(line) for line in lines]

In [None]:
retriever_performance = sum([x["match"] for x in data])/len(data)
print(f"Retriever performance: {retriever_performance}")

query_types = set([x["query_type"] for x in data])
for query_type in query_types:
    query_type_data = [x for x in data if x["query_type"] == query_type]
    retriever_performance = sum([x["match"] for x in query_type_data])/len(query_type_data)
    print(f"Retriever performance for {query_type}: {retriever_performance}")

In [None]:
from dotenv import load_dotenv
from openai import AzureOpenAI
from pathlib import Path
import os

# Specify the path to your .env file
dotenv_path = Path("../.env")
load_dotenv(dotenv_path=dotenv_path)

def get_env_var(key: str) -> str:
    try:
        return os.environ[key]
    except KeyError:
        raise KeyError(f"Please set the {key} environment variable.")

In [None]:
def make_prompt(symptoms_description, document_titles, document_text, k):
    prompt = f"""
    You are part of a retrieval system for a medical domain.
    Given a description of symptoms provided by a patient, an initial retriever has shortlisted several possible conditions, along with the distance score of the most relevant snippet for that condition (so lower is better), the number of snippets retrieved and the entire content of the associated document.
    
    Here is the patient's symptom description:
    {symptoms_description}
    
    The shortlisted conditions and their retrieval scores and number of snippets are:
    {document_titles}
    
    The corresponding condition descriptions are:
    {document_text}
    
    Your task is to select the {k} most likely conditions based on the symptoms. 
    Please return only the titles of the selected conditions, comma-separated.
    """
    return prompt


In [None]:
import ollama


# basic function to prompt the model
def get_response_ollama(prompt, model="gemma3:1b"):
    response = ollama.generate(model=model, prompt=prompt)
    return response["response"]

In [None]:
# Constants
MAX_TOKENS = 2048
AZURE_OPENAI_API_VERSION = "2024-12-01-preview"

endpoint = get_env_var("AZURE_OPENAI_ENDPOINT_gpt-4o")
key = get_env_var("AZURE_OPENAI_API_KEY")

gpt_4o_client = AzureOpenAI(
        api_version=AZURE_OPENAI_API_VERSION,
        azure_endpoint=endpoint,
        api_key=key,
        )

def get_response_gpt4(prompt: str, model: str) -> str:
    response = gpt_4o_client.chat.completions.create(
        messages=[
            {
                "role": "system",
                "content": "You are a helpful assistant.",
            },
            {
                "role": "user",
                "content": prompt,
            },
        ],
        max_completion_tokens=MAX_TOKENS,
        model=model,
    )
    return response.choices[0].message.content

get_response_gpt4("test", "gpt-4o")

In [None]:
from openai import OpenAI

# Initialize the client
qwen_client = OpenAI(
    api_key="EMPTY",  # vLLM doesn't require an API key
    base_url="http://localhost:8000/v1"
)

def get_response_qwen(prompt: str, model: str) -> str:
    response = qwen_client.completions.create(
        model=model,
        prompt=prompt,
        max_tokens=20
    )
    return response.choices[0].text.strip()

In [None]:
with open("../data/nhs-conditions/v3/conditions.jsonl", "r") as f:
    lines = f.readlines()

conditions = {}
for line in lines:
    condition = json.loads(line)
    conditions[condition["condition_title"]] = condition['condition_content']

In [None]:
def re_rank_documents(line, k):
    symptoms_description = line['symptoms_description']
    # document_titles now contains tuples of (source, score)
    document_titles = [(source, score) for source, score in zip(line["retrieved_documents_sources"], line["retrieved_documents_scores"])]

    # Initialize a dictionary to store best sources with additional data
    best_sources = {}

    for source, score in document_titles:
        # If the source is not in best_sources or the current score is lower than the stored one
        if source not in best_sources:
            best_sources[source] = {'lowest_score': score, 'number_of_snippets': 1}  # Initialize the source with a count of 1 snippet
        else:
            best_sources[source]['lowest_score'] = min(best_sources[source]['lowest_score'], score)  # Update the lowest score
            best_sources[source]['number_of_snippets'] += 1  # Increment the number of snippets for this source

    # Sort the sources based on the lowest score
    document_titles_unique = sorted(best_sources.items(), key=lambda x: x[1]['lowest_score'])

    # Create the document_text using the unique titles (you may need to handle 'title' lookup or fetching more data)
    document_text = "\n\n".join([conditions[title] for title, data in document_titles_unique])

    # Prepare the prompt for getting the response
    prompt = make_prompt(symptoms_description, document_titles_unique, document_text, k=k)
    response = get_response_gpt4(prompt, "gpt-4o")
    
    return response


In [None]:
import tqdm

cutoff = 1000

for k in [5,3]:
    ct = 0
    tot = 0
    for line in tqdm.tqdm(data[:cutoff]):
        try:
            reranked_documents = re_rank_documents(line, k).split(",")
            reranked_documents = [x.strip().replace('"',"").replace("'","").replace(" ","-").lower() for x in reranked_documents]
            gold_document = line['conditions_title']
            tot += 1
            if gold_document in reranked_documents:
                ct += 1
            if tot % 50 == 0:
                print (f"k={k}, Current accuracy: {ct/tot}")
        except Exception as e:
            #print (f"Error: {e}")
            continue
    print (f"k={k}, Reranking accuracy: {ct/tot}")