### Midterm project for family caregivers

In [1]:
!pip install -qU langchain langchain_openai langchain-community langgraph arxiv langchain-text-splitters

In [2]:
!pip install -qU langchain-qdrant==0.2.0

In [3]:
!pip install -qU faiss-cpu python-pptx==1.0.2 nltk==3.9.1 pymupdf beautifulsoup4 lxml 

In [4]:
!pip install -qU tiktoken pymupdf==1.25.2

In [5]:
!pip install xmltodict
!pip install -qU ragas==0.2.10



In [6]:
!pip install rapidfuzz



In [7]:
import os
from getpass import getpass
os.environ["OPENAI_API_KEY"] = getpass("Please enter your OpenAI API key!")

In [8]:
os.environ["TAVILY_API_KEY"] = getpass("TAVILY_API_KEY")

In [9]:
from langchain_openai import ChatOpenAI

openai_chat_model = ChatOpenAI(model="gpt-4o-mini")

In [10]:
!mkdir data


mkdir: cannot create directory ‘data’: File exists


In [None]:
!curl -A "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" \
  https://www.ninds.nih.gov/health-information/disorders/amyotrophic-lateral-sclerosis-als -o data/overview.html
##!curl https://www.als.org/understanding-als/symptoms-diagnosis -o data/symotoms.html
##!curl https://www.mayoclinic.org/diseases-conditions/amyotrophic-lateral-sclerosis/symptoms-causes/syc-20354022 -o data/mayo-als.html

In [11]:
import os

print("Files in data/:", os.listdir("data/"))

Files in data/: ['FamilyCaregivers.pdf', 'overview.html']


In [12]:
from langchain_community.document_loaders import DirectoryLoader, BSHTMLLoader, PyPDFLoader

path = "data/"

# Load HTML files
html_loader = DirectoryLoader(path, glob="*.html", loader_cls=BSHTMLLoader)
html_docs = html_loader.load()

# Load PDF files
pdf_loader = DirectoryLoader(path, glob="*.pdf", loader_cls=PyPDFLoader)
pdf_docs = pdf_loader.load()

# Combine both document lists
docs = html_docs + pdf_docs


In [13]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 850,
    chunk_overlap  = 50,
    length_function = len
)
split_documents =  text_splitter.split_documents(docs)

In [14]:
print(len(split_documents))

73


In [15]:
from langchain_openai.embeddings import OpenAIEmbeddings

embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")

In [16]:
from langchain_qdrant import QdrantVectorStore
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams

client = QdrantClient(":memory:")

client.create_collection(
    collection_name="ai_caregiver",
    vectors_config=VectorParams(size=1536, distance=Distance.COSINE),
)

vector_store = QdrantVectorStore(
    client=client,
    collection_name="ai_caregiver",
    embedding=embedding_model,
)



In [17]:
_ = vector_store.add_documents(documents=split_documents)

In [18]:
retriever = vector_store.as_retriever(search_kwargs={"k": 5})

In [19]:
from langchain.retrievers.contextual_compression import ContextualCompressionRetriever

def retrieve(state):
    retrieved_docs = retriever.invoke(state["question"])
    return {"context": retrieved_docs}

In [20]:
from langchain_core.prompts import PromptTemplate

RAG_template = """
Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say "I don't know, would you like to talk to a care coach?", don't try to make up an answer.
Use three sentences maximum and keep the answer as concise as possible.

{context}

Question: {question}

Helpful Answer:"""

rag_prompt = PromptTemplate.from_template(RAG_template) 

In [21]:
def generate(state):
    docs_content = "\n\n".join([doc.page_content for doc in state["context"]])
    messages = rag_prompt.format_prompt(context=docs_content, question=state["question"])
    respose = openai_chat_model.invoke(messages)
    return {"response": respose.content}

In [22]:
from langgraph.graph import START, StateGraph
from typing_extensions import List, TypedDict
from langchain_core.documents import Document

class State(TypedDict):
  question: str
  context: List[Document]
  response: str

In [23]:
graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve");
graph = graph_builder.compile();

In [24]:
response = graph.invoke({"question": "Who is more likely to get ALS?"})

In [25]:
response["response"];
print(response["response"])

Individuals between the ages of 55 and 75, especially men, are more likely to develop ALS. Whites and non-Hispanics are also at a higher risk, and military veterans may have an increased likelihood due to environmental exposure. However, most cases are sporadic and don't have a clear family history.


## RAGAS

In [26]:
from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings
from ragas.metrics import LLMContextRecall, Faithfulness, FactualCorrectness, ResponseRelevancy, ContextEntityRecall, NoiseSensitivity
from ragas import evaluate, RunConfig, EvaluationDataset


generator_llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4o"))
generator_embeddings = LangchainEmbeddingsWrapper(OpenAIEmbeddings())

from ragas.testset import TestsetGenerator

generator = TestsetGenerator(llm=generator_llm, embedding_model=generator_embeddings)
dataset = generator.generate_with_langchain_docs(docs, testset_size=10)

Applying HeadlinesExtractor:   0%|          | 0/5 [00:00<?, ?it/s]

Applying HeadlineSplitter:   0%|          | 0/13 [00:00<?, ?it/s]

unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node


Applying SummaryExtractor:   0%|          | 0/9 [00:00<?, ?it/s]

Property 'summary' already exists in node '672ee0'. Skipping!
Property 'summary' already exists in node '95e9d3'. Skipping!
Property 'summary' already exists in node '96ff50'. Skipping!
Property 'summary' already exists in node '72a38b'. Skipping!


Applying CustomNodeFilter:   0%|          | 0/5 [00:00<?, ?it/s]

Applying [EmbeddingExtractor, ThemesExtractor, NERExtractor]:   0%|          | 0/19 [00:00<?, ?it/s]

Property 'summary_embedding' already exists in node '672ee0'. Skipping!
Property 'summary_embedding' already exists in node '96ff50'. Skipping!
Property 'summary_embedding' already exists in node '95e9d3'. Skipping!
Property 'summary_embedding' already exists in node '72a38b'. Skipping!


Applying [CosineSimilarityBuilder, OverlapScoreBuilder]:   0%|          | 0/2 [00:00<?, ?it/s]

Generating personas:   0%|          | 0/3 [00:00<?, ?it/s]

Generating Scenarios:   0%|          | 0/3 [00:00<?, ?it/s]

Generating Samples:   0%|          | 0/12 [00:00<?, ?it/s]

In [27]:
for test_row in dataset:
  response = graph.invoke({"question" : test_row.eval_sample.user_input})
  test_row.eval_sample.response = response["response"]
  test_row.eval_sample.retrieved_contexts = [context.page_content for context in response["context"]]

dataset.to_pandas()

Unnamed: 0,user_input,retrieved_contexts,reference_contexts,response,reference,synthesizer_name
0,What is ALS and how does it affect the body?,[What is amyotrophic lateral sclerosis (ALS)?\...,[Amyotrophic Lateral Sclerosis (ALS) | Nationa...,Amyotrophic lateral sclerosis (ALS) is a progr...,"Amyotrophic lateral sclerosis (ALS), formerly ...",single_hop_specifc_query_synthesizer
1,how als get diagnosed?,"[In 2021, a team of scientists led by the NIH ...",[How is amyotrophic lateral sclerosis (ALS) di...,ALS is diagnosed through a physical exam and a...,ALS is diagnosed by a neurologist familiar wit...,single_hop_specifc_query_synthesizer
2,What are the racial and ethnic factors that in...,[Age—Although the disease can strike at any ag...,[Who is more likely to get amyotrophic lateral...,Whites and non-Hispanics are most likely to de...,Whites and non-Hispanics are most likely to de...,single_hop_specifc_query_synthesizer
3,What role does the NIH play in ALS research?,[NINDS is the primary federal funder of resear...,[What are the latest updates on amyotrophic la...,"The NIH, through its NINDS component, is the l...","The NIH, through its component NINDS, is the l...",single_hop_specifc_query_synthesizer
4,How is the diagnosis of Amyotrophic Lateral Sc...,[Scientific discoveries have resulted in the i...,[<1-hop>\n\nAmyotrophic Lateral Sclerosis (ALS...,Diagnosis of ALS is performed by a neurologist...,The diagnosis of Amyotrophic Lateral Sclerosis...,multi_hop_abstract_query_synthesizer
5,What are the diagnostic methods for Amyotrophi...,"[In 2021, a team of scientists led by the NIH ...",[<1-hop>\n\nAmyotrophic Lateral Sclerosis (ALS...,ALS is diagnosed primarily by a neurologist fa...,Diagnosing Amyotrophic Lateral Sclerosis (ALS)...,multi_hop_abstract_query_synthesizer
6,What are the diagnostic methods for Amyotrophi...,"[In 2021, a team of scientists led by the NIH ...",[<1-hop>\n\nAmyotrophic Lateral Sclerosis (ALS...,Amyotrophic Lateral Sclerosis (ALS) is diagnos...,Diagnosing Amyotrophic Lateral Sclerosis (ALS)...,multi_hop_abstract_query_synthesizer
7,What are the diagnostic methods for Amyotrophi...,"[In 2021, a team of scientists led by the NIH ...",[<1-hop>\n\nAmyotrophic Lateral Sclerosis (ALS...,Diagnosing ALS typically involves a neurologis...,Diagnosing Amyotrophic Lateral Sclerosis (ALS)...,multi_hop_abstract_query_synthesizer
8,How do the strategic priorities set by NINDS f...,[NINDS is the primary federal funder of resear...,[<1-hop>\n\nWhat are the latest updates on amy...,The strategic priorities set by NINDS for ALS ...,The strategic priorities set by NINDS for ALS ...,multi_hop_specific_query_synthesizer
9,Wht r the risk factrs for amyotrophic lateral ...,[A risk factor is a condition or behavior that...,[<1-hop>\n\nAmyotrophic Lateral Sclerosis (ALS...,Risk factors for amyotrophic lateral sclerosis...,The risk factors for amyotrophic lateral scler...,multi_hop_specific_query_synthesizer


In [28]:

evaluation_dataset = EvaluationDataset.from_pandas(dataset.to_pandas())


evaluator_llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4o"))
custom_run_config = RunConfig(timeout=360)

result = evaluate(
    dataset=evaluation_dataset,
    metrics=[LLMContextRecall(), Faithfulness(), FactualCorrectness(), ResponseRelevancy(), ContextEntityRecall(), NoiseSensitivity()],
    llm=evaluator_llm,
    run_config=custom_run_config
)
result

Evaluating:   0%|          | 0/72 [00:00<?, ?it/s]

{'context_recall': 0.6458, 'faithfulness': 0.8343, 'factual_correctness': 0.5375, 'answer_relevancy': 0.9348, 'context_entity_recall': 0.3674, 'noise_sensitivity_relevant': 0.1031}

### TOOLS

In [29]:
from langchain_core.tools import tool
from langchain_core.messages import HumanMessage

@tool
def ai_rag_tool(question: str) -> str:
    """Answer questions about ALS based on the retrieved documents. Input should be a fully formed question."""
    response = graph.invoke({"question": question})
    return{
        "messages": [HumanMessage(content=response["response"])],
        "context": response["context"],
    }

## AGENT

In [31]:
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_community.tools.arxiv.tool import ArxivQueryRun

tavily_tool = TavilySearchResults(max_results=5)

tool_belt = [
    tavily_tool,
    ai_rag_tool,
]

In [32]:
from langchain_openai import ChatOpenAI

model = ChatOpenAI(model="gpt-4o", temperature=0)

model = model.bind_tools(tool_belt)

In [33]:
from typing import TypedDict, Annotated
from langgraph.graph.message import add_messages
import operator
from langchain_core.messages import BaseMessage
from langchain_core.documents import Document;


class AgentState(TypedDict):
  messages: Annotated[list, add_messages]

In [49]:
from langgraph.prebuilt import ToolNode

def call_model(state):
    messages = state["messages"]
    response = model.invoke(messages, config={"tool_choice": "auto"})  # Ensure it knows it can use tools
    return {"messages": [response]}

tool_node = ToolNode(tool_belt)

In [50]:
from langgraph.graph import StateGraph, END

uncompiled_graph = StateGraph(AgentState)

uncompiled_graph.add_node("agent", call_model)
uncompiled_graph.add_node("action", tool_node)

<langgraph.graph.state.StateGraph at 0x7fe32c16ead0>

In [51]:
uncompiled_graph.set_entry_point("agent")

<langgraph.graph.state.StateGraph at 0x7fe32c16ead0>

In [52]:
def should_continue(state):
    last_message = state["messages"][-1]
    print(f"Checking if model wants to call a tool: {last_message}")  # Debugging

    if hasattr(last_message, "tool_calls") and last_message.tool_calls:
        print(f"Model wants to call a tool: {last_message.tool_calls}")
        return "action"

    print("No tool calls detected, ending execution.")
    return END


### Add conditional edges to the graph
uncompiled_graph.add_conditional_edges(
    "agent",
    should_continue
)

<langgraph.graph.state.StateGraph at 0x7fe32c16ead0>

In [53]:
uncompiled_graph.add_edge("action", "agent")
compiled_graph = uncompiled_graph.compile()

In [54]:
from langchain_core.messages import HumanMessage

inputs = {"messages" : [HumanMessage(content="Who is more likely to get ALS?")]}

async for chunk in compiled_graph.astream(inputs, stream_mode="updates"):
    for node, values in chunk.items():
        print(f"Receiving update from node: '{node}'")
        
        if node == "action":
            # Check what the tool node is actually doing
            print(f"Tool node received: {values}")
            if "messages" in values and values["messages"]:
                print(f"Tool Used: {values['messages'][0].name}")
        
        print(values["messages"])
        print("\n\n")

Checking if model wants to call a tool: content='' additional_kwargs={'tool_calls': [{'id': 'call_aQDNiRQwtvz8ELjUBromCt93', 'function': {'arguments': '{"question":"Who is more likely to get ALS?"}', 'name': 'ai_rag_tool'}, 'type': 'function'}], 'refusal': None} response_metadata={'token_usage': {'completion_tokens': 24, 'prompt_tokens': 121, 'total_tokens': 145, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_eb9dce56a8', 'finish_reason': 'tool_calls', 'logprobs': None} id='run-c1cfa789-465f-49fe-9b01-2af3bfbdc0a5-0' tool_calls=[{'name': 'ai_rag_tool', 'args': {'question': 'Who is more likely to get ALS?'}, 'id': 'call_aQDNiRQwtvz8ELjUBromCt93', 'type': 'tool_call'}] usage_metadata={'input_tokens': 121, 'output_tokens': 24, 'total_tokens': 145, 'input_token_details': {'

In [40]:
from langchain_core.messages import HumanMessage

messages = [HumanMessage(content="Who is more likely to get ALS?")]
result = compiled_graph.invoke({"messages": messages})

result["messages"]

Checking if model wants to call a tool: content='' additional_kwargs={'tool_calls': [{'id': 'call_s7mOeEt4iImPFujKgWOyrv0w', 'function': {'arguments': '{"question":"Who is more likely to get ALS?"}', 'name': 'ai_rag_tool'}, 'type': 'function'}], 'refusal': None} response_metadata={'token_usage': {'completion_tokens': 24, 'prompt_tokens': 121, 'total_tokens': 145, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_f9f4fb6dbf', 'finish_reason': 'tool_calls', 'logprobs': None} id='run-8d9d18f7-97bf-4781-ae95-ffc9c206292d-0' tool_calls=[{'name': 'ai_rag_tool', 'args': {'question': 'Who is more likely to get ALS?'}, 'id': 'call_s7mOeEt4iImPFujKgWOyrv0w', 'type': 'tool_call'}] usage_metadata={'input_tokens': 121, 'output_tokens': 24, 'total_tokens': 145, 'input_token_details': {'

[HumanMessage(content='Who is more likely to get ALS?', additional_kwargs={}, response_metadata={}, id='734d7661-4c57-4855-972e-19e9cfc14398'),
 AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_s7mOeEt4iImPFujKgWOyrv0w', 'function': {'arguments': '{"question":"Who is more likely to get ALS?"}', 'name': 'ai_rag_tool'}, 'type': 'function'}], 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 24, 'prompt_tokens': 121, 'total_tokens': 145, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_f9f4fb6dbf', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-8d9d18f7-97bf-4781-ae95-ffc9c206292d-0', tool_calls=[{'name': 'ai_rag_tool', 'args': {'question': 'Who is more likely to get ALS?'}, 'id': 'call_s7mOeEt4iImPFujKgWOyrv0w', 'type':

## SDG

In [None]:
from ragas.integrations.langgraph import convert_to_ragas_messages

# Assuming 'result["messages"]' contains the list of LangChain messages
ragas_trace = convert_to_ragas_messages(result["messages"])
ragas_trace  # List of Ragas messages

In [None]:
from ragas.dataset_schema import MultiTurnSample
from ragas.metrics import AgentGoalAccuracyWithReference
from ragas.llms import LangchainLLMWrapper


sample = MultiTurnSample(
    user_input=ragas_trace,
    reference="Who is more likely to get ALS?",
)

scorer = AgentGoalAccuracyWithReference()

evaluator_llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4o-mini"))
scorer.llm = evaluator_llm
await scorer.multi_turn_ascore(sample)

In [None]:
from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings
generator_llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4o"))
generator_embeddings = LangchainEmbeddingsWrapper(OpenAIEmbeddings())

In [None]:
from ragas.testset import TestsetGenerator

generator = TestsetGenerator(llm=generator_llm, embedding_model=generator_embeddings)
dataset = generator.generate_with_langchain_docs(docs, testset_size=10)

In [47]:
dataset.to_pandas()

Unnamed: 0,user_input,retrieved_contexts,reference_contexts,response,reference,synthesizer_name
0,What is ALS and how does it affect the body?,[What is amyotrophic lateral sclerosis (ALS)?\...,[Amyotrophic Lateral Sclerosis (ALS) | Nationa...,Amyotrophic lateral sclerosis (ALS) is a progr...,"Amyotrophic lateral sclerosis (ALS), formerly ...",single_hop_specifc_query_synthesizer
1,how als get diagnosed?,"[In 2021, a team of scientists led by the NIH ...",[How is amyotrophic lateral sclerosis (ALS) di...,ALS is diagnosed through a physical exam and a...,ALS is diagnosed by a neurologist familiar wit...,single_hop_specifc_query_synthesizer
2,What are the racial and ethnic factors that in...,[Age—Although the disease can strike at any ag...,[Who is more likely to get amyotrophic lateral...,Whites and non-Hispanics are most likely to de...,Whites and non-Hispanics are most likely to de...,single_hop_specifc_query_synthesizer
3,What role does the NIH play in ALS research?,[NINDS is the primary federal funder of resear...,[What are the latest updates on amyotrophic la...,"The NIH, through its NINDS component, is the l...","The NIH, through its component NINDS, is the l...",single_hop_specifc_query_synthesizer
4,How is the diagnosis of Amyotrophic Lateral Sc...,[Scientific discoveries have resulted in the i...,[<1-hop>\n\nAmyotrophic Lateral Sclerosis (ALS...,Diagnosis of ALS is performed by a neurologist...,The diagnosis of Amyotrophic Lateral Sclerosis...,multi_hop_abstract_query_synthesizer
5,What are the diagnostic methods for Amyotrophi...,"[In 2021, a team of scientists led by the NIH ...",[<1-hop>\n\nAmyotrophic Lateral Sclerosis (ALS...,ALS is diagnosed primarily by a neurologist fa...,Diagnosing Amyotrophic Lateral Sclerosis (ALS)...,multi_hop_abstract_query_synthesizer
6,What are the diagnostic methods for Amyotrophi...,"[In 2021, a team of scientists led by the NIH ...",[<1-hop>\n\nAmyotrophic Lateral Sclerosis (ALS...,Amyotrophic Lateral Sclerosis (ALS) is diagnos...,Diagnosing Amyotrophic Lateral Sclerosis (ALS)...,multi_hop_abstract_query_synthesizer
7,What are the diagnostic methods for Amyotrophi...,"[In 2021, a team of scientists led by the NIH ...",[<1-hop>\n\nAmyotrophic Lateral Sclerosis (ALS...,Diagnosing ALS typically involves a neurologis...,Diagnosing Amyotrophic Lateral Sclerosis (ALS)...,multi_hop_abstract_query_synthesizer
8,How do the strategic priorities set by NINDS f...,[NINDS is the primary federal funder of resear...,[<1-hop>\n\nWhat are the latest updates on amy...,The strategic priorities set by NINDS for ALS ...,The strategic priorities set by NINDS for ALS ...,multi_hop_specific_query_synthesizer
9,Wht r the risk factrs for amyotrophic lateral ...,[A risk factor is a condition or behavior that...,[<1-hop>\n\nAmyotrophic Lateral Sclerosis (ALS...,Risk factors for amyotrophic lateral sclerosis...,The risk factors for amyotrophic lateral scler...,multi_hop_specific_query_synthesizer


### Context Parsing

In [55]:
import pandas as pd
# Convert existing RAGAS dataset to pandas DataFrame
existing_df = dataset.to_pandas()

# Store new evaluation data (retriever + generator)
full_evaluation_logs = []

# Iterate through each row in the dataset
for index, row in existing_df.iterrows():
    user_query = row["user_input"]  # Extract user query

    # Run LangGraph for this query
    initial_state = {"messages": [HumanMessage(content=user_query)]}
    final_state = compiled_graph.invoke(initial_state)

    # Extract relevant outputs
    retrieved_context = final_state.get("context", [])  # Tool retrieved context
    model_response = final_state["messages"][-1].content  # Final LLM response

    # Append to full evaluation logs
    full_evaluation_logs.append({
        "user_input": user_query,
        "retrieved_context": retrieved_context,
        "response": model_response
    })

# Convert to DataFrame
full_evaluation_df = pd.DataFrame(full_evaluation_logs)

Checking if model wants to call a tool: content='' additional_kwargs={'tool_calls': [{'id': 'call_PUZWFqOSt32H8s5tHsRejZrp', 'function': {'arguments': '{"question":"What is ALS and how does it affect the body?"}', 'name': 'ai_rag_tool'}, 'type': 'function'}], 'refusal': None} response_metadata={'token_usage': {'completion_tokens': 27, 'prompt_tokens': 124, 'total_tokens': 151, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_eb9dce56a8', 'finish_reason': 'tool_calls', 'logprobs': None} id='run-3560c75c-8811-4044-92e0-531c533f201f-0' tool_calls=[{'name': 'ai_rag_tool', 'args': {'question': 'What is ALS and how does it affect the body?'}, 'id': 'call_PUZWFqOSt32H8s5tHsRejZrp', 'type': 'tool_call'}] usage_metadata={'input_tokens': 124, 'output_tokens': 27, 'total_tokens': 15

In [59]:
merged_dataset = pd.concat([existing_df, full_evaluation_df], axis=1)

In [61]:
# Run RAGAS evaluation
result = evaluate(
    dataset=evaluation_dataset,
    metrics=[LLMContextRecall(), Faithfulness(), FactualCorrectness(), ResponseRelevancy(), ContextEntityRecall(), NoiseSensitivity()],
    llm=evaluator_llm,
    run_config=custom_run_config
)
result

Evaluating:   0%|          | 0/72 [00:00<?, ?it/s]

Exception raised in Job[53]: TimeoutError()


{'context_recall': 0.6292, 'faithfulness': 0.8435, 'factual_correctness': 0.5450, 'answer_relevancy': 0.9332, 'context_entity_recall': 0.3760, 'noise_sensitivity_relevant': 0.1215}