In [1]:
from llama_index.embeddings.nomic import NomicEmbedding
from llama_index.core import (
    VectorStoreIndex,
    SimpleDirectoryReader,
    ServiceContext,
    Document,
    get_response_synthesizer,
)
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.postprocessor import SimilarityPostprocessor
from typing import Annotated
import os
import re 
import datetime

In [2]:
documents = SimpleDirectoryReader("./data/split/").load_data()
nomic_api_key = os.environ.get("NOMIC_API_KEY")
embed_model = NomicEmbedding(
    api_key=nomic_api_key,
    model_name="nomic-embed-text-v1",
    task_type="search_document"
)
service_context = ServiceContext.from_defaults(
    embed_model=embed_model, chunk_size=1024,
)
index = VectorStoreIndex.from_documents(
    documents=documents, service_context=service_context, show_progress=True
)


  service_context = ServiceContext.from_defaults(


Parsing nodes:   0%|          | 0/3 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/2048 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/2048 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/2048 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/2048 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/2048 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/2048 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/2048 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/2048 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/2048 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/86 [00:00<?, ?it/s]

In [3]:
embed_model = NomicEmbedding(
    api_key=nomic_api_key,
    model_name="nomic-embed-text-v1",
    task_type="search_query"
)

service_context = ServiceContext.from_defaults(
    embed_model=embed_model
)

index = VectorStoreIndex.from_documents(documents, embedding_model=embed_model)
#search_query_retriever = index.as_retriever(service_context=service_context, similarity_top_k=2)

  service_context = ServiceContext.from_defaults(


In [4]:
# configure retriever
retriever = VectorIndexRetriever(
    index=index,
    similarity_top_k=3,
)

# configure response synthesizer
response_synthesizer = get_response_synthesizer()

# assemble query engine
query_engine = RetrieverQueryEngine(
    retriever=retriever,
    response_synthesizer=response_synthesizer,
    node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=0.7)],
)

In [5]:
response = query_engine.query("What is the effectiveness of alternative medicine?")
print(response)

The effectiveness of alternative medicine, specifically high potencies of the homeopathic remedy HgCl2 (Mercurius corrosivus), was tested on two sugar hydrolases in a study. The results showed that an inhibiting effect of HgCl2 on enzyme activities was observed only in low potencies and dilutions, with no statistically significant differences found between potencies and controls in randomized and blinded experiments.


# Mistral Eval + RAG

In [6]:
import json
import os
import matplotlib.pyplot as plt
import glob
from openai import OpenAI
import requests
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
import numpy as np

In [7]:
from dotenv import load_dotenv
load_dotenv()
open_api_key = os.getenv("OPENAI_API_KEY")

In [8]:
def prompt(payload):
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {open_api_key}"
    }

    try:
        resp = requests.post("https://api.openai.com/v1/chat/completions", 
                             headers=headers, json=payload)
        data = resp.json()
        if 'choices' in data and data['choices']:
            resp_dict = data['choices'][0]['message']['content']
            return resp_dict
        else:
            print("Unexpected response structure:", data)
            return None
    
    except requests.exceptions.HTTPError as http_err:
        print(f"HTTP error occurred: {http_err}")
    except requests.exceptions.RequestException as err:
        print(f"An error occurred: {err}")
    except KeyError as key_err:
        print(f"Key error: {key_err} - the structure of the response might not be as expected.")
    
    return None

In [9]:
# load the data as current dir (one level up) + data folder + file 
data = json.load(open('./data/test_set.json'))
labels = json.load(open('./data/test_ground_truth.json'))
len(data), len(labels)

(500, 500)

In [44]:
# for idx, question in data.items():
#     print(idx, question)

In [39]:
model = "gpt-4-0125-preview"
#model = "gpt-3.5-turbo"
run = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

In [12]:
results = {}
for i, qa in data.items():
    question = qa["QUESTION"]
    context = qa["CONTEXTS"]
    context_labels = qa["LABELS"]
    meshes = qa["MESHES"]
    year = qa["YEAR"]

    system_prompt = "You are helpful doctor from Harvard with expertise in searching medically relevant information in VectorDatabases given a question, context, labels, meshes and year provided by the user. Your goal is to provide the user with a medically relevant and related question to search through a medical vectorDB that would better help them decide about the answer to the original one. Your response MUST be a question. It will be used to search for in a similar qa database in order to obtain additional data that could better inform your answer."
    query_prompt = f"Help me come up with a question that helps me answer the following question: {question}, with a response of (yes / no/ maybe) given the context: {context}, the labels: {context_labels}, the meshes: {meshes}, and the year: {year}."

    query_data = {
        "model": model, 
        "temperature": 0.1,
        "messages": [
            {
                "role": "system",
                "content": [
                    {"type": "text", "text": system_prompt},
                ],
            },
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": query_prompt},
                ],
            }
        ],
        "max_tokens": 240,
    }

    synthetic_query = prompt(query_data)
    synthetic_context = query_engine.query(synthetic_query if synthetic_query else question)

    results[i] = {
        "original_question": question,
        "synthetic_query": synthetic_query,
        "synthetic_context": synthetic_context
    }


OSError: [Errno 30] Read-only file system: '/rag'

In [20]:
# get current directory
current_dir = os.getcwd()
# Ensure the directory exists as current dir + rag folder
directory_path = f"{current_dir}/rag/{model}"
if not os.path.exists(directory_path):
    os.makedirs(directory_path)

# Save the results with the run timestamp appended to the filename
file_path = f"{directory_path}/query_results_{run}.json"
with open(file_path, 'w') as file:
    json.dump(results, file)

TypeError: Object of type Response is not JSON serializable

In [40]:
predictions, gt = {}, {}

class_dict = { 'no': 0, 'yes': 1, 'maybe': 2}

# Load synthetic queries and contexts from saved JSON
# with open(file_path, 'r') as file:
    # synthetic_data = json.load(file)
synthetic_data = results
total_items = len(data)

for i, qa in data.items():

    synthetic_query = synthetic_data[str(i)]["synthetic_query"]
    synthetic_context = synthetic_data[str(i)]["synthetic_context"]

    # Create the prompt using the combined context and the current question
    text_prompt = f"Answer the question: {question}. Given the original context: {context}. My previous doctor asked the following question {synthetic_query} to a medically relevant database and shared this complementary context {synthetic_context} that can be used to better inform your answer. The answer can only be one word and it should be either 'yes', 'no', or 'maybe'."
    #text_prompt = f"Answer the question {question} given context: {context}. Answer can only be one word and it should be either 'yes', 'no', or 'maybe'."
    
    prompt_data = {
        "model": model, 
        "temperature": 0.1,
        "messages": [
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": text_prompt},
                ],
            }
        ],
        "max_tokens": 10,
    }
    
    resp = prompt(prompt_data)
    #clean the respose for any non-alphabetical characters:
    resp = re.sub(r'[^a-zA-Z]', '', resp).strip()
    
    try:
        if not resp:
            original_question = data[i]["QUESTION"]
            synthetic_question = query_prompt  # Assuming query_prompt is accessible here
            expected_answer = labels[i]  # Assuming labels[i] is accessible here
            raise ValueError(f"No response received for query of id{i}. Original Question: {original_question}, Synthetic Question: {synthetic_question}, Expected Answer: {expected_answer}")

        predictions.update({i: class_dict[resp.lower()]})
        gt.update({i: class_dict[labels[i]]})
        correct = 1 if resp.lower() == labels[i].lower() else 0

    except KeyError as e:
        original_question = data[i]["QUESTION"]
        synthetic_question = query_prompt  # Assuming query_prompt is accessible here
        synthetic_response = synthetic_query  # Assuming synthetic_query is accessible here
        actual_response = resp
        print(f"KeyError processing query of id {i}: {e}")
        print(f"Original Question: {original_question}")
        print(f"Synthetic Question: {synthetic_question}")
        print(f"Synthetic Response: {synthetic_response}")
        print(f"Actual Response: {actual_response}")

    except ValueError as ve:
        print(ve)

In [41]:
len(predictions), len(gt)

(500, 500)

In [42]:
accuracy_score(list(gt.values()), list(predictions.values() ))

0.552

In [43]:
f1_score(list(gt.values()), list(predictions.values()), average='macro')

0.2371134020618557