# Configuration

In this tutorial, we will use GPT-3.5 and GPT-4 through Azure OpenAI (AOAI) API due to its HIPAA compliance.

In [None]:
# install the openai package
!pip install openai

# For biomedical RAG with custom corpora / retriever / LLMs, please use our MedRAG toolkit: https://github.com/Teddy-XiongGZ/MedRAG (not covered in this tutorial)

In [None]:
from openai import OpenAI
import json
1
# The API key is only valid during the tutorial
# https://build.nvidia.com/nvidia/nemotron-4-340b-instruct
client = OpenAI(
  base_url = "https://integrate.api.nvidia.com/v1",
  api_key = "nvapi-"
)

# Normally, people put the API keys in the environment
# client = AzureOpenAI(
#	api_version="2023-09-01-preview",
#	azure_endpoint=os.getenv("OPENAI_ENDPOINT"),
#	api_key=os.getenv("OPENAI_API_KEY"),
#)

# Retrieval Utilities (Corpus = PubMed; Retriever = Best Match)

In [None]:
import requests
import json
import time
from xml.etree import ElementTree

def retrieve_pubmed(query, retmax=5):
    # for the rate limit
    time.sleep(1/3)

    # Define the base URL for the E-utilities
    base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"

    # Define the esearch URL
    esearch_url = base_url + "esearch.fcgi"

    # Define the parameters for the esearch query
    params = {
        "db": "pubmed",
        "term": query,
        "retmax": retmax,
        "retmode": "json",
        "sort": "relevance",
    }

    # Make the request to the esearch utility
    response = requests.get(esearch_url, params=params)

    # Parse the JSON response
    data = response.json()

    # Extract the PMIDs
    pmids = data["esearchresult"]["idlist"]

    return pmids

def get_pubmed_titles_abstracts_json(pmids):
    # for the rate limit
    time.sleep(1/3)

    # Define the base URL for the BioC API
    bioc_url = "https://www.ncbi.nlm.nih.gov/research/bionlp/RESTful/pubmed.cgi/BioC_json/"

    # Join the PMIDs into a single string
    pmids_str = ",".join(pmids)

    # Make the request to the BioC API
    response = requests.get(f"{bioc_url}{pmids_str}/unicode")

    # Check if the response is successful
    if response.status_code != 200:
        raise Exception("Failed to retrieve data from BioC API")

    # Parse the JSON response
    bioc_data = response.json()

    # Extract titles and abstracts
    articles_dict = {}
    for entry in bioc_data:
        for document in entry["documents"]:
            pmid = document["id"]
            title = ""
            abstract = ""

            for passage in document["passages"]:
                if passage["infons"]["type"] == "title":
                    title = passage["text"]
                elif passage["infons"]["type"] == "abstract":
                    abstract = passage["text"]

            articles_dict[pmid] = {"title": title, "abstract": abstract}

    # Ensure the order matches the input PMIDs
    articles = [articles_dict[pmid] for pmid in pmids if pmid in articles_dict]

    return articles

pmids = retrieve_pubmed("Do preoperative statins reduce atrial fibrillation after coronary artery bypass grafting?")
#pmids = retrieve_pubmed("diabetes")
print(json.dumps(pmids))
contents = get_pubmed_titles_abstracts_json(pmids)
print(json.dumps(contents, indent=4))

# 5 minutes coding

# Dataset

In [None]:
# sampled instances from PubMedQA (first 6) and BioASQ (last 6)
# Jin et al., PubMedQA: A Dataset for Biomedical Research Question Answering
# Krithara et al., BioASQ-QA: A manually curated corpus for Biomedical Question Answering

datasetBK = [
    {"question": "Is hypoalbuminemia an independent prognostic factor in patients with gastric cancer?", "answer": "no"},
    {"question": "Are endothelial cell patterns of astrocytomas indicative of grade?", "answer": "yes"},
    {"question": "Are the long-term results of the transanal pull-through equal to those of the transabdominal pull-through?", "answer": "no"},
    {"question": "Is adjustment for reporting heterogeneity necessary in sleep disorders?", "answer": "no"},
    {"question": "Is halofantrine ototoxic?", "answer": "yes"},
    {"question": "Do mutations causing low HDL-C promote increased carotid intima-media thickness?", "answer": "no"},
    {"question": "Can losartan reduce brain atrophy in Alzheimer's disease?", "answer": "no"},
    {"question": "Is PRP-40 regulation of microexons a conserved phenomenon?", "answer": "yes"},
    {"question": "Does CIDEB mutation protect from liver disease?", "answer": "yes"},
    {"question": "Is levosimendan effective for amyotrophic lateral sclerosis?", "answer": "no"},
    {"question": "Is Iron deficiency anemia a common complication of chronic kidney disease?", "answer": "yes"},
    {"question": "Is Zanubrutinib a first-generation BTK inhibitor approved by US Food and Drug Administration (FDA)?", "answer": "no"},
]


# 先測試兩個範例題目
dataset = [
    {"question": "Is hypoalbuminemia an independent prognostic factor in patients with gastric cancer?", "answer": "no"}
]


# Running Baseline (Direct Prompting)

In [None]:
from sklearn.metrics import accuracy_score

#for model in ["microsoft/phi-3-medium-4k-instruct","nvidia/nemotron-4-340b-instruct"]:
for model in ["nvidia/nemotron-4-340b-instruct"]:

    preds = []
    answers = []

    for entry in dataset:
        question = entry["question"]

        # it should be noted that normally you should ask the model to think step-by-step
        messages = [
            {"role": "system", "content": "You are a helpful assistant for answering biomedical questions. Please only output \"yes\" or \"no\"."},
            {"role": "user", "content": question}
        ]
        print(messages)
        response = client.chat.completions.create(
            model=model,
            messages=messages,
            temperature=0.0002,
        )

        # most important output is the response (i.e., assistant) message
        response_msg = response.choices[0].message.content
        prediction = response_msg.lower().strip(".")
        ground_truth = entry["answer"]
        preds.append(prediction)
        answers.append(ground_truth)
        print('prediction:')
        print(prediction)
        print('answer:')
        print(ground_truth)

    #print(model)
    #print(prediction)
    #print(ground_truth)
    #print("Accuracy: ", accuracy_score(answers, preds))

# 5 minutes coding

# Running RAG

In [None]:
from sklearn.metrics import accuracy_score

#for model in ["microsoft/phi-3-medium-4k-instruct","nvidia/nemotron-4-340b-instruct"]:
for model in ["nvidia/nemotron-4-340b-instruct"]:

    preds = []
    answers = []

    for entry in dataset:
        question = entry["question"]

        # RAG-specific. First step is to find relevant articles
        # sometimes you can use LLMs to re-write the question and do the search
        # here we just use the raw question for the search
        pmids = retrieve_pubmed(question)
        contents = get_pubmed_titles_abstracts_json(pmids)

        rag_prompt = "Here are the relevant materials:\n"

        for article in contents:
            rag_prompt += "Title: " + article["title"] + "\n"
            rag_prompt += "Abstract: " + article["abstract"] + "\n"

        rag_prompt += "Please answer this question based on the provided materials:\n"
        rag_prompt += question

        messages = [
            {"role": "system", "content": "You are a helpful assistant for answering biomedical questions. Please only output \"yes\" or \"no\"."},
            {"role": "user", "content": rag_prompt}
        ]

        response = client.chat.completions.create(
            model=model,
            messages=messages,
            temperature=0.0002,
        )

        # most important output is the response (i.e., assistant) message
        response_msg = response.choices[0].message.content
        prediction = response_msg.lower().strip(".")
        ground_truth = entry["answer"]

        preds.append(prediction)
        answers.append(ground_truth)
        print('prediction:')
        print(prediction)
        print('answer:')
        print(ground_truth)

    #print(model)
    #print("Accuracy: ", accuracy_score(answers, preds))

# 5 minutes coding

# Assignment

In [None]:
question = "Please list the most common symptoms associated with the long-term sequelae of COVID-19. Each symptom should list the source PMID."

# Contact
If you have any questions, you are welcome to contact [Dr. Qiao Jin](https://andy-jqa.github.io/) by qiao.jin@nih.gov.