# Ragas evaluation
Test batch and ragas capability.

Uses this article as a model: https://towardsdatascience.com/visualize-your-rag-data-evaluate-your-retrieval-augmented-generation-system-with-ragas-fc2486308557

Ragas repository: https://github.com/explodinggradients/ragas/tree/main

In [27]:
import os, sys
import json
from pathlib import Path
import pickle

from ragas.testset import TestsetGenerator
from ragas import RunConfig
from dotenv import load_dotenv,find_dotenv
import chromadb
from chromadb import PersistentClient
from langchain_community.vectorstores import Chroma
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_core.documents import Document
import pandas as pd
import random

from ragas import evaluate
from ragas.metrics import answer_correctness
from datasets import Dataset

# Import local packages
sys.path.append('../src/aerospace_chatbot')
import queries
from data_processing import _stable_hash_meta

# Set environment variables with .env
load_dotenv(find_dotenv(), override=True)

True

## Nifty functions

In [2]:
def write_dict_to_file(data_dict, filename):
    """write a dictionary as a json line to a file - allowing for appending"""
    with open(filename, "a") as f:
        f.write(json.dumps(data_dict) + "\n")

def read_dicts_from_file(filename):
    """Read a json line file as a generator of dictionaries - allowing to load multiple dictionaries as list."""
    with open(filename, "r") as f:
        for line in f:
            yield json.loads(line)
            
def add_cached_column_from_file(df, file_name, merge_on, column):
    """Read a file with cached list of dicts data write it to a dataframe."""

    if Path(file_name).exists():

        cached_answer_correctness = (
            pd.DataFrame(list(read_dicts_from_file(file_name)))
            .drop_duplicates(
                subset=[merge_on],
            )[[column, merge_on]]
            .dropna()
            .reset_index(drop=True)
        )
        return df.merge(
            cached_answer_correctness,
            on=merge_on,
            how="left",
        ).reset_index(drop=True)
    else:
        # return df.insert(0, column, None)

        # Create a copy of the DataFrame
        df_out = df.copy()

        # Add the new column with the name of the variable 'column'
        df_out[column] = None

        # Reorder the columns to place the new column at the end
        columns = list(df_out.columns)
        columns.remove(column)
        columns.append(column)
        df_out = df_out[columns]
        
        # df_out = df.copy().assign(column="")
        # df_out = df_out.reindex(columns=( [col for col in df_out.columns if col not in [column]] + [column] ))
        return df_out

## Connect to database

In [3]:
persistent_client = chromadb.PersistentClient(path=os.path.join(os.getenv('LOCAL_DB_PATH'),'chromadb'))   
query_model=OpenAIEmbeddings(model='text-embedding-ada-002',openai_api_key=os.getenv('OPENAI_API_KEY'))

index_name='chromadb-text-embedding-ada-002-ams-nomerge-2000-2020-400-0'

# Connect to vectorstore where no chunking was done only full PDF pages
vectorstore = Chroma(client=persistent_client,
                        collection_name=index_name,
                        embedding_function=query_model)  

In [4]:
all_docs = vectorstore.get(include=["metadatas", "documents", "embeddings"])

In [5]:
lcdocs = [Document(page_content=doc, metadata=metadata) 
          for doc, metadata in zip(all_docs['documents'], all_docs['metadatas'])]

In [6]:
len(lcdocs)

38377

In [7]:
lcdocs[0]

Document(page_content="locking actually relaxes the usual expectation of adhesive to maintain preload to the common expectation for prevailing torque locking features which is to prevent disassembly.{'source': 'AMS_2016.pdf', 'page': 207, 'start_index': 2696}", metadata={'page': 207, 'source': 'AMS_2016.pdf', 'start_index': 2696})

## Generate synthetic dataset

In [8]:
# Openai
generator_model="gpt-3.5-turbo-16k"
synthetic_generator_llm = ChatOpenAI(model=generator_model, tags=[generator_model])


# Hugging face LLM
# generator_model="google/gemma-7b-it"
# synthetic_generator_llm = ChatOpenAI(base_url='https://api-inference.huggingface.co/v1',
#                             model=generator_model,
#                             api_key=os.getenv('HUGGINGFACEHUB_API_TOKEN'),
#                             tags=generator_model)

# Local LLM via LM studio
# synthetic_generator_llm = ChatOpenAI(base_url="http://localhost:1234/v1", api_key="not-needed")

In [9]:
critic_model="gpt-3.5-turbo-0125"
# critic_model="gpt-4"
synthetic_critic_llm = ChatOpenAI(model=critic_model,tags=[critic_model])

embedding_model="text-embedding-ada-002"
synthetic_embeddings = OpenAIEmbeddings(model=embedding_model,api_key=os.getenv('OPENAI_API_KEY'))

In [10]:
generator = TestsetGenerator.from_langchain(
    synthetic_generator_llm,
    synthetic_critic_llm,
    synthetic_embeddings
)

sample_size=min(len(lcdocs),20) # 500 is the max size before you'll hit rate limits with a tier 3 openai account
percent_total=sample_size/len(lcdocs)
print(percent_total)

# Get a random sample of lcdocs
lcdocs_random = random.sample(lcdocs, sample_size)

0.0005211454777601167


In [12]:
# Check if testset.csv exists
if not os.path.exists('./testset.csv'):
    run_config=RunConfig(timeout=1000,
                    max_retries=50,
                    max_wait=1000,
                    max_workers=1)

    n_questions=10
    testset = generator.generate_with_langchain_docs(lcdocs_random, 
                                                    test_size=n_questions,
                                                    with_debugging_logs=True,
                                                    is_async=False,
                                                    run_config=run_config,
                                                    raise_exceptions=False)
    df_testset=testset.to_pandas()
    df_testset.to_csv('testset.csv', index=False)
else:
    # Import testset.csv into a DataFrame
    df_testset = pd.read_csv('testset.csv')

embedding nodes:   0%|          | 0/40 [00:00<?, ?it/s]

Filename and doc_id are the same for all nodes.


Generating:   0%|          | 0/10 [00:00<?, ?it/s]

[ragas.testset.filters.DEBUG] context scoring: {'clarity': 2, 'depth': 2, 'structure': 2, 'relevance': 3, 'score': 2.25}
[ragas.testset.evolutions.DEBUG] keyphrases in merged node: ['Lubricated ball screw', 'Bearing system', 'Gear system', 'Contaminants', 'Operating environment']
[ragas.testset.evolutions.INFO] seed question generated: What types of contaminants could be released from a lubricated gear system into the operating environment?
[ragas.testset.filters.DEBUG] filtered question: {'feedback': 'The question is specific and clear in its intent, asking about the types of contaminants that could be released from a lubricated gear system into the operating environment. It provides a focused inquiry that can be answered based on knowledge of lubricated gear systems and environmental impacts. The question is self-contained and does not require additional context or references to provide a relevant response.', 'verdict': 1}
[ragas.testset.evolutions.DEBUG] answer generated: {'answer':

In [13]:
df_testset

Unnamed: 0,question,contexts,ground_truth,evolution_type,metadata,episode_done
0,What is the purpose of the gimbal support stru...,"[forcevectors, prevented thecaging mechanism f...",The purpose of the gimbal support structure in...,simple,"[{'page': 241, 'source': 'AMS_2002.pdf', 'star...",True
1,What materials are used for the flexures in th...,[itanium or other alternative material flexur...,itanium or other alternative material,simple,"[{'page': 26, 'source': 'AMS_2020.pdf', 'start...",True
2,What are the impacts of non-negligible effects...,[non-negligible effects on the hardware. 403{'...,,simple,"[{'page': 419, 'source': 'AMS_2014.pdf', 'star...",True
3,What types of contaminants could be released f...,"[from a lubricat ed ball screw , bearing, or g...",These contaminants could include base oil cons...,simple,"[{'page': 249, 'source': 'AMS_2016.pdf', 'star...",True
4,What are examples of non-linearities and distu...,[The performance analysis has been executed in...,The main non-linearities and disturbances incl...,simple,"[{'page': 589, 'source': 'AMS_2020.pdf', 'star...",True
5,How were typical mass transfer rates achieved ...,[to transfer all but ~1 kg of simulant to the ...,Typical mass transfer rates of 2 - 4 kg/min we...,reasoning,"[{'page': 291, 'source': 'AMS_2010.pdf', 'star...",True
6,What method was used to measure the hexapod's ...,[Devising thistestwith therequired resolution ...,Laser-ranging interferometers were used to mea...,reasoning,"[{'page': 163, 'source': 'AMS_2001.pdf', 'star...",True
7,How do the clearances in flexure pin locations...,[itanium or other alternative material flexur...,The flexures feature clearances at pin locatio...,multi_context,"[{'page': 26, 'source': 'AMS_2020.pdf', 'start...",True
8,How do the clearances at pin locations in the ...,[itanium or other alternative material flexur...,The clearances at pin locations in the flexure...,multi_context,"[{'page': 26, 'source': 'AMS_2020.pdf', 'start...",True
9,What materials are used for the flexures and w...,[itanium or other alternative material flexur...,The materials used for the flexures are itaniu...,multi_context,"[{'page': 26, 'source': 'AMS_2020.pdf', 'start...",True


### Format dataset and database for RAG

In [14]:
df_questions = df_testset[['question', 'ground_truth']].copy()
df_questions['question_by'] = generator_model

In [15]:
df_questions

Unnamed: 0,question,ground_truth,question_by
0,What is the purpose of the gimbal support stru...,The purpose of the gimbal support structure in...,gpt-3.5-turbo-16k
1,What materials are used for the flexures in th...,itanium or other alternative material,gpt-3.5-turbo-16k
2,What are the impacts of non-negligible effects...,,gpt-3.5-turbo-16k
3,What types of contaminants could be released f...,These contaminants could include base oil cons...,gpt-3.5-turbo-16k
4,What are examples of non-linearities and distu...,The main non-linearities and disturbances incl...,gpt-3.5-turbo-16k
5,How were typical mass transfer rates achieved ...,Typical mass transfer rates of 2 - 4 kg/min we...,gpt-3.5-turbo-16k
6,What method was used to measure the hexapod's ...,Laser-ranging interferometers were used to mea...,gpt-3.5-turbo-16k
7,How do the clearances in flexure pin locations...,The flexures feature clearances at pin locatio...,gpt-3.5-turbo-16k
8,How do the clearances at pin locations in the ...,The clearances at pin locations in the flexure...,gpt-3.5-turbo-16k
9,What materials are used for the flexures and w...,The materials used for the flexures are itaniu...,gpt-3.5-turbo-16k


In [16]:
all_docs = vectorstore.get(include=["metadatas", "documents", "embeddings"])
df_docs = pd.DataFrame(
    {
        "id": [_stable_hash_meta(metadata) for metadata in all_docs["metadatas"]],
        "source": [metadata.get("source") for metadata in all_docs["metadatas"]],
        "page": [metadata.get("page", -1) for metadata in all_docs["metadatas"]],
        "document": all_docs["documents"],
        "embedding": all_docs["embeddings"],
    }
)

In [17]:
df_docs.head(10)

Unnamed: 0,id,source,page,document,embedding
0,00016cda86a70cdd189b23eaa7b0b99066998acf,AMS_2016.pdf,207,locking actually relaxes the usual expectation...,"[-0.017197346314787865, 0.015192832797765732, ..."
1,0001a4a31c369c6418707c7e2ce6e6ef75a68adc,AMS_2018.pdf,323,simply reinforce good system and test design ...,"[-0.008877139538526535, 0.012146159075200558, ..."
2,00053953623d054747d255c6cd28c089854ab4b5,AMS_2018.pdf,473,mechanisms test. Electrical pinout verificatio...,"[-0.04263529181480408, -0.006601013708859682, ..."
3,000639108d50106d83560b522954d3a13ab74544,AMS_2012.pdf,137,failure during the qualification process. Futu...,"[-0.022729255259037018, -0.0034949718974530697..."
4,00094decef554ad836fc395d4fc337dbc02bc557,AMS_2008.pdf,360,J-2X EngineAux Engines (8 total) Crew Explorat...,"[0.008925407193601131, 0.013699463568627834, -..."
5,0009a16bdab4f2763980e652e22c9abaa96c1762,AMS_2018.pdf,235,design guidelines exist. This paper aims to q...,"[-0.0049348436295986176, 0.00696054520085454, ..."
6,0009ce47793fda9f668246faef3714734c664cd5,AMS_2006.pdf,186,SDhere “Sphere” is the name given to the seis...,"[-0.0024836051743477583, 0.02880486287176609, ..."
7,000a145ade42f65bed580c0c2b37fe5da4b3495b,AMS_2001.pdf,199,anddelivery Thetoolbitisdesigned suchthatasthe...,"[-0.0421469509601593, -0.00046973858843557537,..."
8,000b24cad1dffa9035d614dcb5f544483cfd6155,AMS_2016.pdf,361,Experiment Configuration{'source': 'AMS_2016.p...,"[-0.0012854544911533594, 0.005970340222120285,..."
9,000b31548439fb9ef6e98d2c98b1b8327b30a84f,AMS_2008.pdf,350,Figure 7 – Schematic of the Proposed MAG Strut...,"[-0.013374422676861286, 0.009189322590827942, ..."


In [18]:
# Load the cached RAG answers and source_documents ids from a file - or create an empty column
df_questions_answers = add_cached_column_from_file(
    df_questions, "rag_response_cache.txt", "question", "answer")

df_questions_answers = add_cached_column_from_file(
    df_questions_answers, "rag_response_cache.txt", "question", "source_documents")

In [19]:
df_questions_answers

Unnamed: 0,question,ground_truth,question_by,answer,source_documents
0,What is the purpose of the gimbal support stru...,The purpose of the gimbal support structure in...,gpt-3.5-turbo-16k,,
1,What materials are used for the flexures in th...,itanium or other alternative material,gpt-3.5-turbo-16k,,
2,What are the impacts of non-negligible effects...,,gpt-3.5-turbo-16k,,
3,What types of contaminants could be released f...,These contaminants could include base oil cons...,gpt-3.5-turbo-16k,,
4,What are examples of non-linearities and distu...,The main non-linearities and disturbances incl...,gpt-3.5-turbo-16k,,
5,How were typical mass transfer rates achieved ...,Typical mass transfer rates of 2 - 4 kg/min we...,gpt-3.5-turbo-16k,,
6,What method was used to measure the hexapod's ...,Laser-ranging interferometers were used to mea...,gpt-3.5-turbo-16k,,
7,How do the clearances in flexure pin locations...,The flexures feature clearances at pin locatio...,gpt-3.5-turbo-16k,,
8,How do the clearances at pin locations in the ...,The clearances at pin locations in the flexure...,gpt-3.5-turbo-16k,,
9,What materials are used for the flexures and w...,The materials used for the flexures are itaniu...,gpt-3.5-turbo-16k,,


# RAG questions/answers (batch mode)

The cell below demonstrates using aerospace_chatbot in batch mode. It requires some basic parameter setup and a QA_model.

In [20]:
# TODO turn this into a function/standalone notebook to show how to use the QA model in batch mode

index_type='ChromaDB'
index_name=index_name
query_model=synthetic_embeddings
llm=synthetic_generator_llm

QA_model_params={'rag_type':'Standard',
                 'k':4,
                 'search_type':'similarity',
                 'local_db_path':os.getenv('LOCAL_DB_PATH')}

for i, row in df_questions_answers.iterrows():
    if row['answer'] is None or pd.isnull(row['answer']) or row['answer']=='':
        print(f"Processing question {i+1}/{len(df_questions_answers)}")

        # Use the QA model to query the documents
        qa_obj=queries.QA_Model(index_type,
                        index_name,
                        query_model,
                        embedding_model,
                        llm,
                        **QA_model_params)
        qa_obj.query_docs(row['question'])
        response=qa_obj.result

        df_questions_answers.loc[df_questions_answers.index[i], "answer"] = response['answer'].content
        # print(response['references'])

        ids=[_stable_hash_meta(source_document.metadata)
            for source_document in response['references']]
        df_questions_answers.loc[df_questions_answers.index[i], "source_documents"] = ', '.join(ids)

        # Save the response to cache file
        response_dict = {
            "question": row['question'],
            "answer": response['answer'].content,
            "source_documents": ids,
        }
        write_dict_to_file(response_dict, "rag_response_cache.json")

Processing question 1/10
Processing question 2/10
Processing question 3/10
Processing question 4/10
Processing question 5/10
Processing question 6/10
Processing question 7/10
Processing question 8/10
Processing question 9/10
Processing question 10/10


In [21]:
# Get the context documents content for each question
source_documents_list = []
for cell in df_questions_answers['source_documents']:
    cell_list = cell.strip('[]').split(', ')
    context=[]
    for cell in cell_list:
        context.append(df_docs[df_docs["id"] == cell]["document"].values[0])
    source_documents_list.append(context)
df_questions_answers["contexts"]=source_documents_list

In [22]:
# Addtionaly get embeddings for questions

if not Path("question_embeddings.pickle").exists():
    question_embeddings = [
        synthetic_embeddings.embed_query(question)
        for question in df_questions_answers["question"]
    ]
    with open("question_embeddings.pickle", "wb") as f:
        pickle.dump(question_embeddings, f)

question_embeddings = pickle.load(open("question_embeddings.pickle", "rb"))
# answer_embeddings = pickle.load(open("answer_embeddings_2040214_1111.pickle", "rb"))
df_questions_answers["embedding"] = question_embeddings

In [23]:
df_questions_answers

Unnamed: 0,question,ground_truth,question_by,answer,source_documents,contexts,embedding
0,What is the purpose of the gimbal support stru...,The purpose of the gimbal support structure in...,gpt-3.5-turbo-16k,The purpose of the gimbal support structure in...,"947c84c5431adbce3a871d176bca3ec95f723abf, 1215...",[representative mass models mounted to the opt...,"[0.008754832155455809, 0.024371196014564625, 0..."
1,What materials are used for the flexures in th...,itanium or other alternative material,gpt-3.5-turbo-16k,The materials used for the flexures in the des...,"5490707d93e38d03b37fea8bd1dada64a1e6189e, 69f7...",[Design Details Refer to Figure 3 during the...,"[0.014498652455123787, 0.022030419940088804, -..."
2,What are the impacts of non-negligible effects...,,gpt-3.5-turbo-16k,"Non-negligible effects on hardware, such as ov...","fecd2d9bd27865fc820ba157f511ef0e37a03ac4, 4e10...",[Lesson 4: Despite training and consideratio...,"[-0.008154248662624602, -0.009435928392396601,..."
3,What types of contaminants could be released f...,These contaminants could include base oil cons...,gpt-3.5-turbo-16k,There are several types of contaminants that c...,"75664ddd813b438468446ada94251504db42bce3, 0da1...","[from a lubricat ed ball screw , bearing, or g...","[0.02643345015009181, 0.011656323849789886, 0...."
4,What are examples of non-linearities and distu...,The main non-linearities and disturbances incl...,gpt-3.5-turbo-16k,"Yes, some examples of non-linearities and dist...","795e12d95d21b0d4cfddb2cdb750327d743c7af3, 97d8...",[The performance analysis has been executed in...,"[-0.023483485645832046, 0.01654089475140578, 0..."
5,How were typical mass transfer rates achieved ...,Typical mass transfer rates of 2 - 4 kg/min we...,gpt-3.5-turbo-16k,Gas flow rates and pressure gradients were con...,"e027ba6cbabe3e3ba91159cf749f233a4f3828d8, 3ecb...",[to transfer all but ~1 kg of simulant to the ...,"[0.008738064615644069, 0.012188894312694242, 0..."
6,What method was used to measure the hexapod's ...,Laser-ranging interferometers were used to mea...,gpt-3.5-turbo-16k,The method used to measure the hexapod's motio...,"568a8f326ff6b7bd9c45bbca6198746091a46288, 3b76...",[Devising thistestwith therequired resolution ...,"[0.010537129150249947, 0.0013499876112134349, ..."
7,How do the clearances in flexure pin locations...,The flexures feature clearances at pin locatio...,gpt-3.5-turbo-16k,"During assembly, the clearances in flexure pin...","9929732a3e9acc38bde033d198c11f5dc07406fe, 6077...",[itanium or other alternative material flexur...,"[0.007914263766843146, 0.0062284451070591185, ..."
8,How do the clearances at pin locations in the ...,The clearances at pin locations in the flexure...,gpt-3.5-turbo-16k,The additional components that enhance the fle...,"9929732a3e9acc38bde033d198c11f5dc07406fe, 6077...",[itanium or other alternative material flexur...,"[0.027239375523945373, 0.01979811033096837, 0...."
9,What materials are used for the flexures and w...,The materials used for the flexures are itaniu...,gpt-3.5-turbo-16k,"Based on the provided sources, different mater...","5490707d93e38d03b37fea8bd1dada64a1e6189e, 69f7...",[Design Details Refer to Figure 3 during the...,"[0.025676989577233144, 0.031699309129512354, -..."


## Ragas eval

In [25]:
df_questions_answers = add_cached_column_from_file(
    df_questions_answers, "ragas_result_cache.txt", "question", "answer_correctness"
)
df_questions_answers

Unnamed: 0,question,ground_truth,question_by,answer,source_documents,contexts,embedding,answer_correctness
0,What is the purpose of the gimbal support stru...,The purpose of the gimbal support structure in...,gpt-3.5-turbo-16k,The purpose of the gimbal support structure in...,"947c84c5431adbce3a871d176bca3ec95f723abf, 1215...",[representative mass models mounted to the opt...,"[0.008754832155455809, 0.024371196014564625, 0...",
1,What materials are used for the flexures in th...,itanium or other alternative material,gpt-3.5-turbo-16k,The materials used for the flexures in the des...,"5490707d93e38d03b37fea8bd1dada64a1e6189e, 69f7...",[Design Details Refer to Figure 3 during the...,"[0.014498652455123787, 0.022030419940088804, -...",
2,What are the impacts of non-negligible effects...,,gpt-3.5-turbo-16k,"Non-negligible effects on hardware, such as ov...","fecd2d9bd27865fc820ba157f511ef0e37a03ac4, 4e10...",[Lesson 4: Despite training and consideratio...,"[-0.008154248662624602, -0.009435928392396601,...",
3,What types of contaminants could be released f...,These contaminants could include base oil cons...,gpt-3.5-turbo-16k,There are several types of contaminants that c...,"75664ddd813b438468446ada94251504db42bce3, 0da1...","[from a lubricat ed ball screw , bearing, or g...","[0.02643345015009181, 0.011656323849789886, 0....",
4,What are examples of non-linearities and distu...,The main non-linearities and disturbances incl...,gpt-3.5-turbo-16k,"Yes, some examples of non-linearities and dist...","795e12d95d21b0d4cfddb2cdb750327d743c7af3, 97d8...",[The performance analysis has been executed in...,"[-0.023483485645832046, 0.01654089475140578, 0...",
5,How were typical mass transfer rates achieved ...,Typical mass transfer rates of 2 - 4 kg/min we...,gpt-3.5-turbo-16k,Gas flow rates and pressure gradients were con...,"e027ba6cbabe3e3ba91159cf749f233a4f3828d8, 3ecb...",[to transfer all but ~1 kg of simulant to the ...,"[0.008738064615644069, 0.012188894312694242, 0...",
6,What method was used to measure the hexapod's ...,Laser-ranging interferometers were used to mea...,gpt-3.5-turbo-16k,The method used to measure the hexapod's motio...,"568a8f326ff6b7bd9c45bbca6198746091a46288, 3b76...",[Devising thistestwith therequired resolution ...,"[0.010537129150249947, 0.0013499876112134349, ...",
7,How do the clearances in flexure pin locations...,The flexures feature clearances at pin locatio...,gpt-3.5-turbo-16k,"During assembly, the clearances in flexure pin...","9929732a3e9acc38bde033d198c11f5dc07406fe, 6077...",[itanium or other alternative material flexur...,"[0.007914263766843146, 0.0062284451070591185, ...",
8,How do the clearances at pin locations in the ...,The clearances at pin locations in the flexure...,gpt-3.5-turbo-16k,The additional components that enhance the fle...,"9929732a3e9acc38bde033d198c11f5dc07406fe, 6077...",[itanium or other alternative material flexur...,"[0.027239375523945373, 0.01979811033096837, 0....",
9,What materials are used for the flexures and w...,The materials used for the flexures are itaniu...,gpt-3.5-turbo-16k,"Based on the provided sources, different mater...","5490707d93e38d03b37fea8bd1dada64a1e6189e, 69f7...",[Design Details Refer to Figure 3 during the...,"[0.025676989577233144, 0.031699309129512354, -...",


In [26]:
# prepare the dataframe for evaluation
df_qa_eval = df_questions_answers.copy()


# adapt the ground truth to the ragas name and format
df_qa_eval.rename(columns={"ground_truth": "ground_truths"}, inplace=True)
df_qa_eval["ground_truths"] = [
    [gt] if not isinstance(gt, list) else gt for gt in df_qa_eval["ground_truths"]
]

In [28]:
# evaluate the answer correctness if not already done
fields = ["question", "answer", "contexts", "ground_truths"]
for i, row in df_qa_eval.iterrows():
    if row["answer_correctness"] is None or pd.isnull(row["answer_correctness"]):
        evaluation_result = evaluate(
            Dataset.from_pandas(df_qa_eval.iloc[i : i + 1][fields]),
            [answer_correctness],
        )
        df_qa_eval.loc[i, "answer_correctness"] = evaluation_result[
            "answer_correctness"
        ]

        # optionally save the response to cache
        response_dict = {
            "question": row["question"],
            "answer_correctness": evaluation_result["answer_correctness"],
        }
        write_dict_to_file(response_dict, "ragas_result_cache.txt")

# write the answer correctness to the original dataframe
df_questions_answers["answer_correctness"] = df_qa_eval["answer_correctness"]



Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]



Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]



Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]



Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]



Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]



Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]



Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]



Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]



Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]



Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

In [29]:
# add the infos about questions using each document to the documents dataframe


# Explode 'source_documents' so each document ID is in its own row alongside the question ID
df_questions_exploded = df_qa_eval.explode("source_documents")

# Group by exploded 'source_documents' (document IDs) and aggregate
agg = (
    df_questions_exploded.groupby("source_documents")
    .agg(
        num_questions=("id", "count"),  # Count of questions referencing the document
        question_ids=(
            "id",
            lambda x: list(x),
        ),  # List of question IDs referencing the document
    )
    .reset_index()
    .rename(columns={"source_documents": "id"})
)

# Merge the aggregated information back into df_documents
df_documents_agg = pd.merge(df_docs, agg, on="id", how="left")

# Use apply to replace NaN values with empty lists for 'question_ids'
df_documents_agg["question_ids"] = df_documents_agg["question_ids"].apply(
    lambda x: x if isinstance(x, list) else []
)
# Replace NaN values in 'num_questions' with 0
df_documents_agg["num_questions"] = df_documents_agg["num_questions"].fillna(0)

KeyError: "Column(s) ['id'] do not exist"