# evaluation
ragas ref:


https://docs.ragas.io/en/stable/index.html

In [1]:
from dotenv import load_dotenv
import os
import sys
import logging

from llama_index.core import StorageContext, load_index_from_storage

from llama_index.core.retrievers import VectorIndexRetriever

from llama_index.core import Settings

from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.llms.openai import OpenAI

from llama_index.core.data_structs import Node
from llama_index.core.schema import NodeWithScore
from llama_index.core import get_response_synthesizer


import openai

import nest_asyncio
nest_asyncio.apply()

import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

from llama_index.llms.openai import OpenAI
from IPython.display import Markdown, display

from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core import QueryBundle
from llama_index.postprocessor.rankgpt_rerank import RankGPTRerank

import pandas as pd
from IPython.display import display, HTML


load_dotenv()  # Load environment variables from .env file
openai.api_key = os.getenv("OPENAI_API_KEY")

OPENAI_API_KEY = openai.api_key

Settings.llm = OpenAI(temperature=0, model="gpt-3.5-turbo")
Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-large")


logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))


%load_ext autoreload
%autoreload 2




In [2]:
# Rebuild the storage context
storage_context = StorageContext.from_defaults(persist_dir="./persist")

# Load the index from storage
index = load_index_from_storage(storage_context)



def get_retrieved_nodes(
    query_str, vector_top_k=10, reranker_top_n=3, with_reranker=False
):
    query_bundle = QueryBundle(query_str)
    # configure retriever
    retriever = VectorIndexRetriever(
        index=index,
        similarity_top_k=vector_top_k,
    )
    retrieved_nodes = retriever.retrieve(query_bundle)

    if with_reranker:
        # configure reranker
        reranker = RankGPTRerank(
            llm=OpenAI(
               model="gpt-3.5-turbo",
               temperature=0.0,
               api_key=OPENAI_API_KEY,
            ),
            top_n=reranker_top_n,
            verbose=True,
        )
        retrieved_nodes = reranker.postprocess_nodes(
            retrieved_nodes, query_bundle
        )

    return retrieved_nodes



query = "What are potential CONSECUENCES of keytruda?"
retrieved_nodes = get_retrieved_nodes(
    query,
    vector_top_k=30,
    reranker_top_n=5,
    with_reranker=True,
)

response_synthesizer = get_response_synthesizer(response_mode="compact")

response = response_synthesizer.synthesize(
    query, nodes=retrieved_nodes
)

response

INFO:llama_index.core.indices.loading:Loading all indices.
Loading all indices.
Loading all indices.
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
After Reranking, new rank list for nodes: [0, 6, 3, 1, 2, 7, 8, 25, 13, 23, 27, 9, 24, 14, 26, 15, 16, 20, 18, 19, 21, 28, 17, 10, 4, 5, 11, 12, 29, 22]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Response(response='Potential consequences of Keytruda include immune-related adverse effects such as colitis, hepatitis, pneumonitis, and exacerbation of pre-existing autoimmune diseases. It is important for patients to monitor for symptoms like cough, shortness of breath, chest pain, and changes in vision while on Keytruda treatment to ensure optimal care and safety.', source_nodes=[NodeWithScore(node=TextNode(id_='1c86f3e8-53f4-417a-9c1b-81fb42255e97', embedding=None, metadata={'document_title': 'Keytruda and Immune-Related Adverse Effects: A Comprehensive Overview'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='49de588d-beb9-4e7e-84dc-06ffb2e63a2d', node_type=<ObjectType.DOCUMENT: '4'>, metadata={}, hash='c73be1b4e2cb5e6157fd80000f0fe59f6ca8631fa4924c5ed0be7dd3180c3218')}, text='Can Keytruda cause immune-related adverse effects? Yes, Keytruda can cause immune-related adverse effects such as co

In [3]:
from llama_index.core.query_engine import CustomQueryEngine
from llama_index.core.retrievers import BaseRetriever
from llama_index.core import get_response_synthesizer
from llama_index.core.response_synthesizers import BaseSynthesizer

class RAGQueryEngine(CustomQueryEngine):
    """RAG Query Engine."""

    
    response_synthesizer: BaseSynthesizer

    def custom_query(self, query_str: str):
        nodes = get_retrieved_nodes(
                        query_str,
                        vector_top_k=30,
                        reranker_top_n=5,
                        with_reranker=False,
                    )
        response_obj = self.response_synthesizer.synthesize(query_str, nodes)
        return response_obj
    
synthesizer = get_response_synthesizer(response_mode="compact")
query_engine = RAGQueryEngine(
    response_synthesizer=synthesizer
)

response = query_engine.query("What are potential CONSECUENCES of keytruda?")
response


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Response(response='Potential consequences of Keytruda include immune-related adverse effects such as colitis, hepatitis, and pneumonitis, as well as changes in blood pressure, interactions with steroids and immunosuppressants, and the need for close monitoring for specific symptoms like cough, chest pain, and changes in vision. Additionally, Keytruda may lead to exacerbation of pre-existing autoimmune diseases and require discontinuation in cases of significant disease progression or unacceptable toxicity.', source_nodes=[NodeWithScore(node=TextNode(id_='1c86f3e8-53f4-417a-9c1b-81fb42255e97', embedding=None, metadata={'document_title': 'Keytruda and Immune-Related Adverse Effects: A Comprehensive Overview'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='49de588d-beb9-4e7e-84dc-06ffb2e63a2d', node_type=<ObjectType.DOCUMENT: '4'>, metadata={}, hash='c73be1b4e2cb5e6157fd80000f0fe59f6ca8631fa4924c5ed0

# Evaluation

In [49]:
df = pd.read_parquet("../data/processed_data/df_eval.parquet")
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 220 entries, 0 to 219
Data columns (total 23 columns):
 #   Column                     Non-Null Count  Dtype         
---  ------                     --------------  -----         
 0   Unnamed: 0                 220 non-null    int64         
 1   Country                    220 non-null    object        
 2   Requester_Type             220 non-null    object        
 3   Product                    220 non-null    object        
 4   Indication                 220 non-null    object        
 5   Question                   220 non-null    object        
 6   Channel                    220 non-null    object        
 7   Date_Time_Open             220 non-null    datetime64[ns]
 8   Date_Time_Closed           220 non-null    datetime64[ns]
 9   Answer_Solution            220 non-null    object        
 10  Duration                   220 non-null    int64         
 11  Day_of_Week                220 non-null    object        
 12  Week_Num

In [50]:
response_vector = query_engine.query(df["augmented_questions"][0])

response_vector

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Response(response='Common adverse reactions associated with Keytruda include fatigue, nausea, skin rash, colitis, hepatitis, pneumonitis, cough, chest pain, changes in vision, itching, diarrhea, and fluctuations in blood pressure.', source_nodes=[NodeWithScore(node=TextNode(id_='1c86f3e8-53f4-417a-9c1b-81fb42255e97', embedding=None, metadata={'document_title': 'Keytruda and Immune-Related Adverse Effects: A Comprehensive Overview'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='49de588d-beb9-4e7e-84dc-06ffb2e63a2d', node_type=<ObjectType.DOCUMENT: '4'>, metadata={}, hash='c73be1b4e2cb5e6157fd80000f0fe59f6ca8631fa4924c5ed0be7dd3180c3218')}, text='Can Keytruda cause immune-related adverse effects? Yes, Keytruda can cause immune-related adverse effects such as colitis, hepatitis, and pneumonitis.', mimetype='text/plain', start_char_idx=0, end_char_idx=150, text_template='[Excerpt from document]\n{met

In [51]:
response_vector

Response(response='Common adverse reactions associated with Keytruda include fatigue, nausea, skin rash, colitis, hepatitis, pneumonitis, cough, chest pain, changes in vision, itching, diarrhea, and fluctuations in blood pressure.', source_nodes=[NodeWithScore(node=TextNode(id_='1c86f3e8-53f4-417a-9c1b-81fb42255e97', embedding=None, metadata={'document_title': 'Keytruda and Immune-Related Adverse Effects: A Comprehensive Overview'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='49de588d-beb9-4e7e-84dc-06ffb2e63a2d', node_type=<ObjectType.DOCUMENT: '4'>, metadata={}, hash='c73be1b4e2cb5e6157fd80000f0fe59f6ca8631fa4924c5ed0be7dd3180c3218')}, text='Can Keytruda cause immune-related adverse effects? Yes, Keytruda can cause immune-related adverse effects such as colitis, hepatitis, and pneumonitis.', mimetype='text/plain', start_char_idx=0, end_char_idx=150, text_template='[Excerpt from document]\n{met

In [52]:
from ragas.metrics import (
    faithfulness,
    answer_relevancy,
    context_precision,
    context_recall,
)
#from ragas.metrics.critique import harmfulness

metrics = [
    faithfulness,
    answer_relevancy,
    #context_precision,
    #context_recall,
    #harmfulness,
]

In [53]:
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding

# using GPT 3.5, use GPT 4 / 4-turbo for better accuracy
evaluator_llm = OpenAI(model="gpt-3.5-turbo")

In [54]:
ds = df[["augmented_questions"]].rename(columns = {"augmented_questions":"question"})

ds_dict = ds.to_dict()
ds_dict["question"] = [v for k, v in ds_dict["question"].items()] 
ds_dict["question"]

['What are the frequently encountered adverse reactions associated with Keytruda?',
 'Which side effects are typically seen in patients receiving Keytruda therapy?',
 'What are the usual side effects that individuals may experience when using Keytruda?',
 'How common are the side effects reported with Keytruda treatment?',
 'What are the expected side effects that patients may encounter while taking Keytruda?',
 'Are immune-related adverse effects a potential risk associated with Keytruda?',
 'Can Keytruda lead to adverse effects related to the immune system?',
 'What immune-related adverse effects may arise from Keytruda treatment?',
 'Is there a possibility of immune-related side effects occurring with Keytruda use?',
 'What immune system complications can be caused by Keytruda?',
 'What is the safety profile of Keytruda in pregnant women?',
 'Are there any risks associated with using Keytruda during pregnancy?',
 'Is Keytruda considered safe for pregnant patients based on current me

In [55]:
from datasets import Dataset

ds_dict = ds.to_dict()
ds_dict["question"] = [v for k, v in ds_dict["question"].items()] 

dataset = Dataset.from_dict(ds_dict)
dataset

Dataset({
    features: ['question'],
    num_rows: 220
})

In [None]:


from ragas.integrations.llama_index import evaluate

result = evaluate(
    query_engine=query_engine,
    metrics=metrics,
    dataset=dataset,
    llm=evaluator_llm,
    embeddings=OpenAIEmbedding(),
)

df_result = result.to_pandas()
df_result.to_parquet("../data/processed_data/df_result_ragas_without_reranking.parquet")

df_result

In [57]:
# without reranking
df_result.describe()

Unnamed: 0,faithfulness,answer_relevancy
count,218.0,220.0
mean,0.831684,0.859019
std,0.29759,0.301806
min,0.0,0.0
25%,0.666667,0.94917
50%,1.0,0.964384
75%,1.0,0.974852
max,1.0,1.0


In [45]:
# with reranking
df_result.describe()

Unnamed: 0,faithfulness,answer_relevancy
count,219.0,220.0
mean,0.793747,0.89062
std,0.314552,0.259305
min,0.0,0.0
25%,0.5,0.949299
50%,1.0,0.966063
75%,1.0,0.978517
max,1.0,1.0
