# RAG for Question Similarity in RFPs

## Notebook setup

In [1]:
import pandas as pd

In [2]:
%pip install -qU langchain langchain-openai langchain-cohere


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.1.2[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [3]:
%pip install -qU qdrant-client lark


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.1.2[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [4]:
import os

import dotenv

dotenv.load_dotenv()

if os.getenv("OPENAI_API_KEY") is None:
    raise Exception("OPENAI_API_KEY not found")

In [5]:
import textwrap
from IPython.display import HTML, display
from tabulate import tabulate


def _format_cell_text(text, width=50):
    """Private function to format a cell's text."""
    return "\n".join([textwrap.fill(line, width=width) for line in text.split("\n")])


def _format_dataframe_for_tabulate(df):
    """Private function to format the entire DataFrame for tabulation."""
    df_out = df.copy()

    # Format all string columns
    for column in df_out.columns:
        # Check if column is of type object (likely strings)
        if df_out[column].dtype == object:
            df_out[column] = df_out[column].apply(_format_cell_text)
    return df_out


def _dataframe_to_html_table(df):
    """Private function to convert a DataFrame to an HTML table."""
    headers = df.columns.tolist()
    table_data = df.values.tolist()
    return tabulate(table_data, headers=headers, tablefmt="html")


def display_nice(df, num_rows=None):
    """Primary function to format and display a DataFrame."""
    if num_rows is not None:
        df = df.head(num_rows)
    formatted_df = _format_dataframe_for_tabulate(df)
    html_table = _dataframe_to_html_table(formatted_df)
    display(HTML(html_table))

In [6]:
def print_dict_keys(data, indent=0):
    for key, value in data.items():
        print(' ' * indent + str(key))
        if isinstance(value, dict):  # if the value is another dictionary, recurse
            print_dict_keys(value, indent + 4)

## Data preparation

### Load existing RFPs

In [7]:
# List of CSV file paths
existing_rfp_paths = [
    "datasets/rag/rfp_existing_questions_client_2.csv",
]

existing_rfp_df = [pd.read_csv(file_path) for file_path in existing_rfp_paths]

# Concatenate all DataFrames into one
existing_rfp_df = pd.concat(existing_rfp_df, ignore_index=True)

In [8]:
existing_rfp_df

Unnamed: 0,Project_Title,RFP_Question_ID,RFP_Question,RFP_Answer,Area,Last_Accessed_At,Requester,Status
0,AI-Powered Risk Assessment Model Development f...,1,Can you discuss your expertise in creating AI-...,Our company has 15 years of experience in deve...,General,18/12/2022,Bank B,Awarded
1,AI-Powered Risk Assessment Model Development f...,2,How do you keep your AI applications current w...,We maintain a dedicated R&D team focused on in...,General,18/12/2022,Bank B,Awarded
2,AI-Powered Risk Assessment Model Development f...,3,Are your AI applications adaptable to specific...,"Absolutely, customization is a core aspect of ...",General,18/12/2022,Bank B,Awarded
3,AI-Powered Risk Assessment Model Development f...,4,What steps do you undertake to protect user pr...,User privacy and data security are paramount. ...,General,18/12/2022,Bank B,Awarded
4,AI-Powered Risk Assessment Model Development f...,5,What strategies do you employ to design user i...,Our design philosophy centers on simplicity an...,General,18/12/2022,Bank B,Awarded
5,AI-Powered Risk Assessment Model Development f...,6,Explain the support and maintenance services y...,"Post-launch, we offer comprehensive support an...",General,18/12/2022,Bank B,Awarded
6,AI-Powered Risk Assessment Model Development f...,7,How do you evaluate the effectiveness and impa...,Success measurement is tailored to each projec...,General,18/12/2022,Bank B,Awarded
7,AI-Powered Risk Assessment Model Development f...,8,How do you manage ethical concerns in your LLM...,We adhere to ethical AI practices by implement...,Large Language Models,18/12/2022,Bank B,Awarded
8,AI-Powered Risk Assessment Model Development f...,9,"Could you outline how you train your LLMs, inc...",Our LLM training process begins with the metic...,Large Language Models,18/12/2022,Bank B,Awarded
9,AI-Powered Risk Assessment Model Development f...,10,How do you ensure your LLMs continuously learn...,We implement advanced continuous learning mech...,Large Language Models,18/12/2022,Bank B,Awarded


In [9]:
from langchain_community.document_loaders.csv_loader import CSVLoader

documents = []

# Iterate through each file path in the list
for file_path in existing_rfp_paths:
    loader = CSVLoader(
        file_path=file_path,
        metadata_columns=["Area"]
    )

    # Load a document from the current CSV file
    doc = loader.load()
    
    # Append documents
    documents.extend(doc)

When using `CSVLoader`, each document represents a single row and includes its respective contents:

In [10]:
number_of_documents = 5

for i, document in enumerate(documents[:number_of_documents]):
    print(f"Document {i + 1}: {document}")

Document 1: page_content='Project_Title: AI-Powered Risk Assessment Model Development for Loan Processing\nRFP_Question_ID: 1\nRFP_Question: Can you discuss your expertise in creating AI-driven applications and share examples of your successful implementations?\nRFP_Answer: Our company has 15 years of experience in developing AI-based applications, with a strong portfolio in sectors such as healthcare, finance, and education. For instance, our project MediAI Insight for the healthcare industry demonstrated significant achievements in patient data analysis, resulting in a 30% reduction in diagnostic errors and a 40% improvement in treatment personalization. Our platform has engaged over 200 healthcare facilities, achieving a user satisfaction rate of 95%.\nLast_Accessed_At: 18/12/2022\nRequester: Bank B\nStatus: Awarded' metadata={'source': 'datasets/rag/rfp_existing_questions_client_2.csv', 'row': 0, 'Area': 'General'}
Document 2: page_content='Project_Title: AI-Powered Risk Assessment

Accessing the page content of each document:

In [11]:
number_of_documents = 2

for i, document in enumerate(documents[:number_of_documents]):
    print(f"Page content for document {i + 1}:")
    print(document.page_content)
    print()

Page content for document 1:
Project_Title: AI-Powered Risk Assessment Model Development for Loan Processing
RFP_Question_ID: 1
RFP_Question: Can you discuss your expertise in creating AI-driven applications and share examples of your successful implementations?
RFP_Answer: Our company has 15 years of experience in developing AI-based applications, with a strong portfolio in sectors such as healthcare, finance, and education. For instance, our project MediAI Insight for the healthcare industry demonstrated significant achievements in patient data analysis, resulting in a 30% reduction in diagnostic errors and a 40% improvement in treatment personalization. Our platform has engaged over 200 healthcare facilities, achieving a user satisfaction rate of 95%.
Last_Accessed_At: 18/12/2022
Requester: Bank B
Status: Awarded

Page content for document 2:
Project_Title: AI-Powered Risk Assessment Model Development for Loan Processing
RFP_Question_ID: 2
RFP_Question: How do you keep your AI appli

Note that when adding metadata, it is appended to the default metadata, which consists of the row number and the source: 

In [12]:
number_of_documents = 5

for i, document in enumerate(documents[:number_of_documents]):
    print(f"Metadata for document {i + 1}: {document.metadata}")

Metadata for document 1: {'source': 'datasets/rag/rfp_existing_questions_client_2.csv', 'row': 0, 'Area': 'General'}
Metadata for document 2: {'source': 'datasets/rag/rfp_existing_questions_client_2.csv', 'row': 1, 'Area': 'General'}
Metadata for document 3: {'source': 'datasets/rag/rfp_existing_questions_client_2.csv', 'row': 2, 'Area': 'General'}
Metadata for document 4: {'source': 'datasets/rag/rfp_existing_questions_client_2.csv', 'row': 3, 'Area': 'General'}
Metadata for document 5: {'source': 'datasets/rag/rfp_existing_questions_client_2.csv', 'row': 4, 'Area': 'General'}


## Split the documents into chunks

In [13]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500, chunk_overlap=10, add_start_index=True
)
chunks = text_splitter.split_documents(documents)

Get some general information about the chunks:

In [14]:
print(f"Number of chunks: {len(chunks)}")

Number of chunks: 98


See the length of the bigger and smaller chunks:

In [15]:
max_chunk_length = max([len(chunk.page_content) for chunk in chunks])
min_chunk_length = min([len(chunk.page_content) for chunk in chunks])
mean_chunk_length = sum([len(chunk.page_content) for chunk in chunks]) / len(chunks)

print(f"Maximum chunk length: {max_chunk_length}")
print(f"Minimum chunk length: {min_chunk_length}")
print(f"Mean chunk length: {mean_chunk_length}")

Maximum chunk length: 499
Minimum chunk length: 12
Mean chunk length: 267.6020408163265


Plot the distribution of chunks: 

In [16]:
import plotly.express as px

# Calculate lengths of each chunk's page_content
chunk_lengths = [len(chunk.page_content) for chunk in chunks]

# Creating a histogram of chunk lengths
fig = px.histogram(chunk_lengths, nbins=50, title="Distribution of Chunk Lengths")
fig.update_layout(
    xaxis_title="Chunk Length",
    yaxis_title="Count",
    bargap=0.2,
    showlegend=False
)

# Add summary statistics as text on the plot
fig.add_annotation(
    x=max(chunk_lengths),
    y=0,
    showarrow=False,
    yshift=10
)

# Show the plot
fig.show()

Inspect the chunks: 

In [17]:
number_of_chunks = 5  

for index, chunk in enumerate(chunks[:i]):
    print(f"Chunk {index + 1}: {chunk}")  

Chunk 1: page_content='Project_Title: AI-Powered Risk Assessment Model Development for Loan Processing\nRFP_Question_ID: 1\nRFP_Question: Can you discuss your expertise in creating AI-driven applications and share examples of your successful implementations?' metadata={'source': 'datasets/rag/rfp_existing_questions_client_2.csv', 'row': 0, 'Area': 'General', 'start_index': 0}
Chunk 2: page_content='RFP_Answer: Our company has 15 years of experience in developing AI-based applications, with a strong portfolio in sectors such as healthcare, finance, and education. For instance, our project MediAI Insight for the healthcare industry demonstrated significant achievements in patient data analysis, resulting in a 30% reduction in diagnostic errors and a 40% improvement in treatment personalization. Our platform has engaged over 200 healthcare facilities, achieving a user satisfaction rate of' metadata={'source': 'datasets/rag/rfp_existing_questions_client_2.csv', 'row': 0, 'Area': 'General',

See the page content of each chunk:

In [18]:
number_of_chunks = 5

for i, document in enumerate(chunks[:number_of_chunks]):
    print(f"Page content for chunk {i + 1}:")
    print(document.page_content)
    print()

Page content for chunk 1:
Project_Title: AI-Powered Risk Assessment Model Development for Loan Processing
RFP_Question_ID: 1
RFP_Question: Can you discuss your expertise in creating AI-driven applications and share examples of your successful implementations?

Page content for chunk 2:
RFP_Answer: Our company has 15 years of experience in developing AI-based applications, with a strong portfolio in sectors such as healthcare, finance, and education. For instance, our project MediAI Insight for the healthcare industry demonstrated significant achievements in patient data analysis, resulting in a 30% reduction in diagnostic errors and a 40% improvement in treatment personalization. Our platform has engaged over 200 healthcare facilities, achieving a user satisfaction rate of

Page content for chunk 3:
rate of 95%.

Page content for chunk 4:
Last_Accessed_At: 18/12/2022
Requester: Bank B
Status: Awarded

Page content for chunk 5:
Project_Title: AI-Powered Risk Assessment Model Development

See the metadata for individual chunks:

In [19]:
number_of_chunks = 5  

for i, chunk in enumerate(chunks[:number_of_chunks]):
    print(f"Metadata for chunk {i + 1}: {chunk.metadata}")



Metadata for chunk 1: {'source': 'datasets/rag/rfp_existing_questions_client_2.csv', 'row': 0, 'Area': 'General', 'start_index': 0}
Metadata for chunk 2: {'source': 'datasets/rag/rfp_existing_questions_client_2.csv', 'row': 0, 'Area': 'General', 'start_index': 234}
Metadata for chunk 3: {'source': 'datasets/rag/rfp_existing_questions_client_2.csv', 'row': 0, 'Area': 'General', 'start_index': 723}
Metadata for chunk 4: {'source': 'datasets/rag/rfp_existing_questions_client_2.csv', 'row': 0, 'Area': 'General', 'start_index': 736}
Metadata for chunk 5: {'source': 'datasets/rag/rfp_existing_questions_client_2.csv', 'row': 1, 'Area': 'General', 'start_index': 0}


Access the source of each chunk:

In [20]:
number_of_chunks = 5  

for i, chunk in enumerate(chunks[:number_of_chunks]):
    print(f"Source for chunk {i + 1}: {chunk.metadata['source']}")

Source for chunk 1: datasets/rag/rfp_existing_questions_client_2.csv
Source for chunk 2: datasets/rag/rfp_existing_questions_client_2.csv
Source for chunk 3: datasets/rag/rfp_existing_questions_client_2.csv
Source for chunk 4: datasets/rag/rfp_existing_questions_client_2.csv
Source for chunk 5: datasets/rag/rfp_existing_questions_client_2.csv


## Store chunks into a vectorstore

In [21]:
from langchain.vectorstores.chroma import Chroma
from langchain_openai import OpenAIEmbeddings

embeddings_model = OpenAIEmbeddings(model="text-embedding-3-small")

vectorstore = Chroma.from_documents(
    documents=chunks,
    embedding=embeddings_model,
)

## Create evaluation dataset

In [22]:
# Load all RFPs into a single pandas DataFrame

rag_evaluation_df = pd.read_csv("datasets/rag/rag_evaluation_dataset_v1.csv")

# Set the constant variable to the number of rows in the DataFrame
NUM_OF_NEW_RFP_QUESTIONS = len(rag_evaluation_df)

print("Number of New RFP Questions:", NUM_OF_NEW_RFP_QUESTIONS)

Number of New RFP Questions: 23


In [23]:
rag_evaluation_df.info()
rag_evaluation_df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 23 entries, 0 to 22
Data columns (total 7 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   id               23 non-null     int64 
 1   new_rfp          23 non-null     object
 2   new_question     23 non-null     object
 3   question_to_llm  23 non-null     object
 4   answer           23 non-null     object
 5   ground_truth     23 non-null     object
 6   existing_rfp     23 non-null     object
dtypes: int64(1), object(6)
memory usage: 1.4+ KB


Unnamed: 0,id,new_rfp,new_question,question_to_llm,answer,ground_truth,existing_rfp
0,1,rfp_new_questions_client_100.csv,What is your experience in developing AI-based...,"What is the most similar question to: ""What is...",,Can you discuss your expertise in creating AI-...,rfp_exisiting_questions_client_2.csv
1,2,rfp_new_questions_client_100.csv,How do you ensure your AI-based apps remain up...,"What is the most similar question to: ""How do ...",,How do you keep your AI applications current w...,rfp_exisiting_questions_client_2.csv
2,3,rfp_new_questions_client_100.csv,Can your AI-based applications be customized t...,"What is the most similar question to: ""Can you...",,Are your AI applications adaptable to specific...,rfp_exisiting_questions_client_2.csv
3,4,rfp_new_questions_client_100.csv,What measures do you take to ensure user priva...,"What is the most similar question to: ""What me...",,What steps do you undertake to protect user pr...,rfp_exisiting_questions_client_2.csv
4,5,rfp_new_questions_client_100.csv,How do you approach user interface and experie...,"What is the most similar question to: ""How do ...",,What strategies do you employ to design user i...,rfp_exisiting_questions_client_2.csv


In [24]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-4-turbo", temperature=0.0)
retriever = vectorstore.as_retriever(search_kwargs={"k": 5})

In [25]:
from langchain.prompts import ChatPromptTemplate

template = """Answer the question based only on the following context. 
If you cannot answer the question with the context, please respond with 'I don't know':

### CONTEXT
{context}

### QUESTION
Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

In [26]:
from operator import itemgetter

from langchain_openai import ChatOpenAI
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnableLambda, RunnablePassthrough

# Step 1: "question": Retrieved from the "question" key.
# Step 2: "context": Retrieved from the "question" key and fed into the retriever.
# Step 3: "context": Assigned to a RunnablePassthrough object using the "context" key from the previous step.
# Step 4: "answer": "context" and "question" are combined to format the prompt, then sent to the LLM and stored under the "answer" key.
# Step 5: "context": Repopulated using the "context" key from the previous step.

llm = ChatOpenAI(model_name="gpt-4-turbo", temperature=0)

rag_chain = (
    
    {"context": itemgetter("question") | retriever, "question": itemgetter("question")}
    | RunnablePassthrough.assign(context=itemgetter("context"))
    | {"answer": prompt | llm, "context": itemgetter("context")}

)

Ask a question to test the chain:

In [27]:
question = "Find a similar question as this one: 'What is your experience in developing AI-based applications?'"
response = rag_chain.invoke({"question" : question})
print(response)

{'answer': AIMessage(content='RFP_Question_ID: 1\nRFP_Question: Can you discuss your expertise in creating AI-driven applications and share examples of your successful implementations?', response_metadata={'token_usage': {'completion_tokens': 32, 'prompt_tokens': 589, 'total_tokens': 621}, 'model_name': 'gpt-4-turbo', 'system_fingerprint': 'fp_76f018034d', 'finish_reason': 'stop', 'logprobs': None}, id='run-bc067503-662c-4efd-b3a5-a9be2309ad69-0'), 'context': [Document(page_content='Project_Title: AI-Powered Risk Assessment Model Development for Loan Processing\nRFP_Question_ID: 1\nRFP_Question: Can you discuss your expertise in creating AI-driven applications and share examples of your successful implementations?', metadata={'Area': 'General', 'row': 0, 'source': 'datasets/rag/rfp_existing_questions_client_2.csv', 'start_index': 0}), Document(page_content='Project_Title: AI-Powered Risk Assessment Model Development for Loan Processing\nRFP_Question_ID: 2\nRFP_Question: How do you keep

As defined in the earlier chat prompt, the RAG response includes two fields: `answer` and `context`:

In [28]:
print_dict_keys(response)

answer
context


Inspecting the answer, we see that the `rag_chain` is functioning correctly and identifies the most similar question in the `vectorstore`:

In [29]:
print(f"Question:")
print(question)
print()
print(f"Answer:")
print(response["answer"].content)

Question:
Find a similar question as this one: 'What is your experience in developing AI-based applications?'

Answer:
RFP_Question_ID: 1
RFP_Question: Can you discuss your expertise in creating AI-driven applications and share examples of your successful implementations?


Next, we inspect the content of the `answer` and the `context` retrieved based on the `question`. The context should contain `k` chunks, the most relevant based on the question. Remember that we set`k` in the `retriever` earlier. These `k` chunks are pasted into the prompt as text, informing the LLM to generate an answer that is closer in the embedding space to the question.

In [30]:
number_of_chunks = 5  

for i, chunk in enumerate(response["context"][:number_of_chunks]):
    print(f"Content for chunk {i + 1}:")  # i + 1 to start counting from 1 instead of 0
    print(chunk.page_content)
    print()

Content for chunk 1:
Project_Title: AI-Powered Risk Assessment Model Development for Loan Processing
RFP_Question_ID: 1
RFP_Question: Can you discuss your expertise in creating AI-driven applications and share examples of your successful implementations?

Content for chunk 2:
Project_Title: AI-Powered Risk Assessment Model Development for Loan Processing
RFP_Question_ID: 2
RFP_Question: How do you keep your AI applications current with ongoing advancements in artificial intelligence?

Content for chunk 3:
RFP_Answer: Our company has 15 years of experience in developing AI-based applications, with a strong portfolio in sectors such as healthcare, finance, and education. For instance, our project MediAI Insight for the healthcare industry demonstrated significant achievements in patient data analysis, resulting in a 30% reduction in diagnostic errors and a 40% improvement in treatment personalization. Our platform has engaged over 200 healthcare facilities, achieving a user satisfaction 

We now inspect the `response_metadata` object to understand its contents and identify what could be useful to incorporate in our RAG evaluation dataset:

In [31]:
print(response["answer"].response_metadata)

{'token_usage': {'completion_tokens': 32, 'prompt_tokens': 589, 'total_tokens': 621}, 'model_name': 'gpt-4-turbo', 'system_fingerprint': 'fp_76f018034d', 'finish_reason': 'stop', 'logprobs': None}


In [32]:
print_dict_keys(response["answer"].response_metadata)

token_usage
    completion_tokens
    prompt_tokens
    total_tokens
model_name
system_fingerprint
finish_reason
logprobs


Extracting the LLM used:

In [33]:
print(f"Model: {response['answer'].response_metadata['model_name']}")

Model: gpt-4-turbo


As we showed earlier, we can also extract some token usage statistics that can help us understand and optimize our interactions with the language model for cost-effectiveness and efficiency.

- **Prompt tokens**: tokens that form the input text sent to the language model. This includes all the text provided to the LLM to generate a response.
- **Completion tokens**: number of tokens in the generated text or output from the model.
- **Total tokens**: total number of tokens processed by the model. It is the sum of both `prompt_tokens` and `completion_tokens`. 

In [34]:
print(f"Completion tokens: {response['answer'].response_metadata['token_usage']['completion_tokens']}")
print(f"Prompt tokens: {response['answer'].response_metadata['token_usage']['prompt_tokens']}")
print(f"Total tokens: {response['answer'].response_metadata['token_usage']['total_tokens']}")

Completion tokens: 32
Prompt tokens: 589
Total tokens: 621


We will now expand our evaluation dataset to capture some metadata generated by the LLM, which will be used later when validating our RAG pipeline. We will add the following additional columns to our dataframe: `context`, `model_name`, `completion_tokens`, prompt_tokens, and `total_tokens`.

In [35]:
rag_evaluation_df['context'] = ''

rag_evaluation_df['question_embeddings'] = ''
rag_evaluation_df['answer_embeddings'] = ''
rag_evaluation_df['context_embeddings'] = ''

rag_evaluation_df['similarity_score_question_vs_context'] = ''
rag_evaluation_df['similarity_score_question_vs_answer'] = ''
rag_evaluation_df['similarity_score_context_vs_answer'] = ''

rag_evaluation_df['model'] = ''

rag_evaluation_df['completion_tokens'] = ''
rag_evaluation_df['prompt_tokens'] = ''
rag_evaluation_df['total_tokens'] = ''

rag_evaluation_df['response_time'] = ''

We would like to also compute few similarity metrics between embeddings such as cosine similaruty or euclidean distance: 

In [36]:
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

def cosine_similarity_score(embedding1, embedding2):
    """
    Computes the cosine similarity between two embeddings.

    Parameters:
    - embedding1 (array-like): Embedding of the first entity.
    - embedding2 (array-like): Embedding of the second entity.

    Returns:
    - float: Cosine similarity score between the two embeddings.

    Note: The order of the embeddings does not affect the result as cosine similarity is symmetric.
    """
    # Ensure the embeddings are reshaped to 2D arrays for sklearn's cosine_similarity
    embedding1 = np.array(embedding1).reshape(1, -1)
    embedding2 = np.array(embedding2).reshape(1, -1)

    # Calculate and return the cosine similarity
    return cosine_similarity(embedding1, embedding2)[0][0]

In [37]:
import numpy as np

def euclidean_distance(embedding1, embedding2):
    """
    Computes the Euclidean distance between two embeddings.

    Parameters:
    - embedding1 (array-like): First embedding vector.
    - embedding2 (array-like): Second embedding vector.

    Returns:
    - float: Euclidean distance between the two embeddings.
    """
    # Convert inputs to NumPy arrays if they aren't already
    embedding1 = np.array(embedding1)
    embedding2 = np.array(embedding2)
    
    # Calculate and return the Euclidean distance
    return np.linalg.norm(embedding1 - embedding2)

In [38]:
import time

# Number of questions to process by the RAG model
number_of_rows_to_process = NUM_OF_NEW_RFP_QUESTIONS

for i, (index, row) in enumerate(rag_evaluation_df.iloc[:number_of_rows_to_process].iterrows()):
    print(f"Processing row {i}...")

    # Check if the 'answer' field is 'None' (as a string) for the current row
    if row["answer"] == "None":
        print(f"Answer is 'None' for question ID {index}. Invoking RAG model...")

        start_time = time.time()  # Start timing
        
        # Invoke the RAG model with the question from the current row
        response = rag_chain.invoke({"question": row["question_to_llm"]})

        end_time = time.time()  # End timing

        # Calculate the response time and store it
        rag_evaluation_df.at[index, 'response_time'] = round(end_time - start_time, 1)

        # Store whatever response comes from the LLM
        rag_evaluation_df.at[index, "answer"] = response["answer"].content
        print(f"Question ID {index} answer updated with the response from the RAG model.")
    
        # Store the context included in the prompt
        context = "\n\n".join(chunk.page_content for chunk in response["context"])
        rag_evaluation_df.at[index, "context"] = context
        
        # Compute and store embeddings for the question, context and answer
        print("Computing embeddings for the question...")
        question_embeddings = np.array(embeddings_model.embed_query(row["question_to_llm"]))
        rag_evaluation_df.at[index, 'question_embeddings'] = question_embeddings
        
        print("Computing embeddings for the context...")
        context_embeddings = np.array(embeddings_model.embed_query(context))
        rag_evaluation_df.at[index, 'context_embeddings'] = context_embeddings
        
        print("Computing embeddings for the answer...")
        answer_embeddings = np.array(embeddings_model.embed_query(response["answer"].content))
        rag_evaluation_df.at[index, 'answer_embeddings'] = answer_embeddings
        
        # Compute similarity measures between embeddings 
        print("Computing cosine similarity between question and context...")
        rag_evaluation_df.at[index, 'similarity_score_question_vs_context'] = cosine_similarity_score(question_embeddings, context_embeddings)
        
        print("Computing cosine similarity between question and answer...")
        rag_evaluation_df.at[index, 'similarity_score_question_vs_answer'] = cosine_similarity_score(question_embeddings, answer_embeddings)

        print("Computing cosine similarity between context and answer...")
        rag_evaluation_df.at[index, 'similarity_score_context_vs_answer'] = cosine_similarity_score(context_embeddings, answer_embeddings)
        
        # Store some metadata such as model name and tokens statistics
        rag_evaluation_df.at[index, "model"] = response["answer"].response_metadata["model_name"]
        rag_evaluation_df.at[index, "completion_tokens"] = response['answer'].response_metadata['token_usage']['completion_tokens']
        rag_evaluation_df.at[index, "prompt_tokens"] = response['answer'].response_metadata['token_usage']['prompt_tokens']
        rag_evaluation_df.at[index, "total_tokens"] = response['answer'].response_metadata['token_usage']['total_tokens']

print("Processing complete.")

Processing row 0...
Answer is 'None' for question ID 0. Invoking RAG model...
Question ID 0 answer updated with the response from the RAG model.
Computing embeddings for the question...
Computing embeddings for the context...
Computing embeddings for the answer...
Computing cosine similarity between question and context...
Computing cosine similarity between question and answer...
Computing cosine similarity between context and answer...
Processing row 1...
Answer is 'None' for question ID 1. Invoking RAG model...
Question ID 1 answer updated with the response from the RAG model.
Computing embeddings for the question...
Computing embeddings for the context...
Computing embeddings for the answer...
Computing cosine similarity between question and context...
Computing cosine similarity between question and answer...
Computing cosine similarity between context and answer...
Processing row 2...
Answer is 'None' for question ID 2. Invoking RAG model...
Question ID 2 answer updated with the 

First, check if all responses have been generated by the RAG pipeline or if there are any `None` values in the answers column. If there are any rows with `None` answers, remove these before they are passed to the RAGAS metrics.

In [39]:
rag_evaluation_df = rag_evaluation_df[rag_evaluation_df['answer'] != 'None']
rag_evaluation_df

Unnamed: 0,id,new_rfp,new_question,question_to_llm,answer,ground_truth,existing_rfp,context,question_embeddings,answer_embeddings,context_embeddings,similarity_score_question_vs_context,similarity_score_question_vs_answer,similarity_score_context_vs_answer,model,completion_tokens,prompt_tokens,total_tokens,response_time
0,1,rfp_new_questions_client_100.csv,What is your experience in developing AI-based...,"What is the most similar question to: ""What is...",RFP_Question: Can you discuss your expertise i...,Can you discuss your expertise in creating AI-...,rfp_exisiting_questions_client_2.csv,Project_Title: AI-Powered Risk Assessment Mode...,"[-0.012242779808196847, -0.02875495641236209, ...","[0.028609594368424196, -0.024913389269028082, ...","[0.0096687535303967, -0.009518850642010203, 0....",0.627775,0.733246,0.770459,gpt-4-turbo,23,550,573,1.9
1,2,rfp_new_questions_client_100.csv,How do you ensure your AI-based apps remain up...,"What is the most similar question to: ""How do ...",RFP_Question: How do you keep your AI applicat...,How do you keep your AI applications current w...,rfp_exisiting_questions_client_2.csv,Project_Title: AI-Powered Risk Assessment Mode...,"[-0.021238304961058666, -0.002903160656478289,...","[0.009174146107929795, 0.004682353866566277, 0...","[-0.00010336780165633408, 0.017887354504405963...",0.575001,0.783792,0.692069,gpt-4-turbo,20,541,561,3.1
2,3,rfp_new_questions_client_100.csv,Can your AI-based applications be customized t...,"What is the most similar question to: ""Can you...","The most similar question to ""Can your AI-base...",Are your AI applications adaptable to specific...,rfp_exisiting_questions_client_2.csv,"RFP_Answer: Absolutely, customization is a cor...","[-0.024933725810230647, -0.00398689651570446, ...","[-0.018114261370701053, -0.003040488117825552,...","[-0.0010771675571721216, -0.009237417812190684...",0.642971,0.933371,0.650528,gpt-4-turbo,42,532,574,3.2
3,4,rfp_new_questions_client_100.csv,What measures do you take to ensure user priva...,"What is the most similar question to: ""What me...","The most similar question to ""What measures do...",What steps do you undertake to protect user pr...,rfp_exisiting_questions_client_2.csv,Project_Title: AI-Powered Risk Assessment Mode...,"[-0.011542554119771953, -0.012979928523676116,...","[-0.017716543891049378, -0.021140465682403832,...","[0.018035042880599816, 0.013365664407925769, 0...",0.643153,0.932541,0.645517,gpt-4-turbo,49,608,657,2.7
4,5,rfp_new_questions_client_100.csv,How do you approach user interface and experie...,"What is the most similar question to: ""How do ...",RFP_Question: What strategies do you employ to...,What strategies do you employ to design user i...,rfp_exisiting_questions_client_2.csv,RFP_Answer: Our design philosophy centers on s...,"[-0.022403337192397944, -0.003745948452053383,...","[0.012766196768228667, 0.02172657166295028, 0....","[0.00669202890762761, 0.01376124531146158, 0.0...",0.580555,0.790653,0.724465,gpt-4-turbo,26,594,620,4.3
5,6,rfp_new_questions_client_100.csv,Describe your support and maintenance services...,"What is the most similar question to: ""Describ...","The most similar question to ""Describe your su...",Explain the support and maintenance services y...,rfp_exisiting_questions_client_2.csv,Project_Title: AI-Powered Risk Assessment Mode...,"[-0.02594255529391264, 0.026052017363914444, 0...","[-0.018622273370422313, 0.016507519223956362, ...","[-0.0008700734038692212, 0.026400598223398738,...",0.653195,0.924453,0.674123,gpt-4-turbo,44,587,631,2.9
6,7,rfp_new_questions_client_100.csv,How do you measure the success and impact of y...,"What is the most similar question to: ""How do ...",RFP_Question: How do you evaluate the effectiv...,How do you evaluate the effectiveness and impa...,rfp_exisiting_questions_client_2.csv,Project_Title: AI-Powered Risk Assessment Mode...,"[-0.005094874094156965, -0.0018191395470256865...","[0.01474235542817176, 0.004943857159210323, 0....","[0.0021657988083164455, 0.00496627439696867, 0...",0.629255,0.785387,0.75012,gpt-4-turbo,22,537,559,2.1
7,8,rfp_new_questions_client_100.csv,How do you ensure the ethical use of LLMs in y...,"What is the most similar question to: ""How do ...",RFP_Question: How do you manage ethical concer...,How do you manage ethical concerns in your LLM...,rfp_exisiting_questions_client_2.csv,Project_Title: AI-Powered Risk Assessment Mode...,"[0.021543276731008216, 0.01490634165937239, 0....","[0.02458965545433629, 0.017371860934522247, 0....","[0.011270932737455297, 0.019519289755009234, 0...",0.694476,0.835385,0.741921,gpt-4-turbo,28,607,635,2.6
8,9,rfp_new_questions_client_100.csv,Can you describe the process of training your ...,"What is the most similar question to: ""Can you...","The most similar question to ""Can you describe...","Could you outline how you train your LLMs, inc...",rfp_exisiting_questions_client_2.csv,Project_Title: AI-Powered Risk Assessment Mode...,"[-0.02396939689706394, 0.018455669672488594, 0...","[-0.018115209323870087, 0.023699326320744635, ...","[0.002109897776288197, 0.01695080650381053, 0....",0.628883,0.946127,0.656781,gpt-4-turbo,61,605,666,2.9
9,10,rfp_new_questions_client_100.csv,How do you handle the continuous learning and ...,"What is the most similar question to: ""How do ...",RFP_Question: How do you ensure your LLMs cont...,How do you ensure your LLMs continuously learn...,rfp_exisiting_questions_client_2.csv,Project_Title: AI-Powered Risk Assessment Mode...,"[-0.00825047894972602, 0.021084556591604547, 0...","[-0.013420040554404636, 0.028803989073751236, ...","[0.009807711474561132, 0.015744742017965888, 0...",0.622149,0.766513,0.633393,gpt-4-turbo,26,633,659,1.8


We save the results in a CSV file for convenience to avoid having to execute the entire RAG pipeline every time we want to test our RAG evaluation metrics:

In [40]:
# Save to CSV
rag_evaluation_df.to_csv('rag_evaluation.csv', index=False)

We now proceed to evaluate our RAG pipeline using RAGAS metrics from the `ragas` package. The `evaluate()` function expects a Dataset with specific column names: `question`, `contexts`, `ground_truth`, and `answer`. We will now rename these columns to conform to the expected column names in RAGAS.

In [41]:
# prepare the dataframe for RAGAS evaluation
ragas_results_df = rag_evaluation_df.copy()

# Rename the columns to match ragas convention
ragas_results_df.rename(
    columns={
        "question_to_llm": "question",
        "context": "contexts"}, 
    inplace=True
)


# Convert the 'contexts' column from a string to a list of strings for each row
ragas_results_df['contexts'] = ragas_results_df['contexts'].apply(lambda x: [x])

ragas_results_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 23 entries, 0 to 22
Data columns (total 19 columns):
 #   Column                                Non-Null Count  Dtype 
---  ------                                --------------  ----- 
 0   id                                    23 non-null     int64 
 1   new_rfp                               23 non-null     object
 2   new_question                          23 non-null     object
 3   question                              23 non-null     object
 4   answer                                23 non-null     object
 5   ground_truth                          23 non-null     object
 6   existing_rfp                          23 non-null     object
 7   contexts                              23 non-null     object
 8   question_embeddings                   23 non-null     object
 9   answer_embeddings                     23 non-null     object
 10  context_embeddings                    23 non-null     object
 11  similarity_score_question_vs_conte

In [42]:
from ragas.metrics import (
    answer_relevancy,
    faithfulness,
    context_recall,
    context_precision,
    context_relevancy,
    answer_correctness,
    answer_similarity
)

from ragas import evaluate

def evaluate_ragas_dataset(ragas_dataset):
  result = evaluate(
    ragas_dataset,
    metrics=[
        context_precision,
        faithfulness,
        answer_relevancy,
        context_recall,
        context_relevancy,
        answer_correctness,
        answer_similarity
    ],
  )
  return result

Now, we apply the RAGAS evaluation metrics row by row, adding the results to corresponding columns for each metric in our evaluation dataset. We first initialize the columns where the evaluation metrics will be stored:

In [43]:
ragas_results_df['context_precision'] = ''
ragas_results_df['faithfulness'] = ''
ragas_results_df['answer_relevancy'] = ''
ragas_results_df['context_recall'] = ''
ragas_results_df['context_relevancy'] = ''
ragas_results_df['answer_correctness'] = ''
ragas_results_df['answer_similarity'] = ''


In [44]:
from datasets import Dataset

required_fields = ["question", "answer", "contexts", "ground_truth"]
metrics = ["context_precision", "faithfulness", "answer_relevancy", "context_recall", "context_relevancy", "answer_correctness", "answer_similarity"]

# Set the variable to the number of rows, limited to a maximum of NUM_OF_NEW_RFP_QUESTIONS
number_of_rows_to_process = min(len(ragas_results_df), NUM_OF_NEW_RFP_QUESTIONS)

# Mapping of metric names to their respective functions, assuming these functions are predefined
metrics_functions = {
    "context_precision": context_precision,
    "faithfulness": faithfulness,
    "answer_relevancy": answer_relevancy,
    "context_recall": context_recall,
    "context_relevancy": context_relevancy,
    "answer_correctness": answer_correctness,
    "answer_similarity": answer_similarity
}

In [45]:

# This loop processes each row up to a predefined number of rows, evaluating them with specified metrics and storing the results
for i, (index, row) in enumerate(rag_evaluation_df.iloc[:number_of_rows_to_process].iterrows()):
    print(f"Processing RFP question {i+1}...")

    # Create a temporary Dataset for the current row
    ragas_dataset = Dataset.from_pandas(ragas_results_df.iloc[i: i + 1][required_fields])

    # Evaluate using RAGAS metrics
    evaluation_result = evaluate(
        ragas_dataset, 
        [metrics_functions[metric] for metric in metrics if metric in metrics_functions])
    print("Evaluation completed.")

    # Store evaluation results back into the DataFrame
    for metric in metrics:
        if metric in evaluation_result:
            ragas_results_df.at[i, metric] = evaluation_result[metric]
            print(f"{metric}: {evaluation_result[metric]}")

print("All RFP questions processed.")


Processing RFP question 1...


Evaluating:   0%|          | 0/7 [00:00<?, ?it/s]

Evaluation completed.
context_precision: 0.9999999999
faithfulness: 1.0
answer_relevancy: 0.8990139835745649
context_recall: 1.0
context_relevancy: 0.0
answer_correctness: 0.9888756135739206
answer_similarity: 0.9557178312698792
Processing RFP question 2...


Evaluating:   0%|          | 0/7 [00:00<?, ?it/s]

Evaluation completed.
context_precision: 0.9999999999
faithfulness: 1.0
answer_relevancy: 0.8945532209190032
context_recall: 1.0
context_relevancy: 0.0
answer_correctness: 0.9876390580059653
answer_similarity: 0.9505562320238611
Processing RFP question 3...


Evaluating:   0%|          | 0/7 [00:00<?, ?it/s]

Evaluation completed.
context_precision: 0.9999999999
faithfulness: 1.0
answer_relevancy: 0.9001553151297851
context_recall: 1.0
context_relevancy: 0.0
answer_correctness: 0.9797074043800069
answer_similarity: 0.9188296175200277
Processing RFP question 4...


Evaluating:   0%|          | 0/7 [00:00<?, ?it/s]

Evaluation completed.
context_precision: 0.9999999999
faithfulness: 1.0
answer_relevancy: 0.9258427866932077
context_recall: 0.45454545454545453
context_relevancy: 0.5882352941176471
answer_correctness: 0.9801575821857698
answer_similarity: 0.9206303287430789
Processing RFP question 5...


Evaluating:   0%|          | 0/7 [00:00<?, ?it/s]

Evaluation completed.
context_precision: 0.9999999999
faithfulness: 1.0
answer_relevancy: 0.8977863192635689
context_recall: 0.0
context_relevancy: 0.5333333333333333
answer_correctness: 0.9896201612910706
answer_similarity: 0.9584806451642822
Processing RFP question 6...


Evaluating:   0%|          | 0/7 [00:00<?, ?it/s]

Evaluation completed.
context_precision: 0.9999999999
faithfulness: 1.0
answer_relevancy: 0.8928452027275666
context_recall: 0.5
context_relevancy: 0.4444444444444444
answer_correctness: 0.9778436048212342
answer_similarity: 0.9113744192849368
Processing RFP question 7...


Evaluating:   0%|          | 0/7 [00:00<?, ?it/s]

Evaluation completed.
context_precision: 0.9999999999
faithfulness: 1.0
answer_relevancy: 0.9062997671753094
context_recall: 1.0
context_relevancy: 0.0
answer_correctness: 0.7390270956049935
answer_similarity: 0.9561083824199739
Processing RFP question 8...


Evaluating:   0%|          | 0/7 [00:00<?, ?it/s]

Evaluation completed.
context_precision: 0.9999999999
faithfulness: 1.0
answer_relevancy: 0.921053957702389
context_recall: 1.0
context_relevancy: 0.0
answer_correctness: 0.991748924081385
answer_similarity: 0.96699569632554
Processing RFP question 9...


Evaluating:   0%|          | 0/7 [00:00<?, ?it/s]

Evaluation completed.
context_precision: 0.9999999999
faithfulness: 1.0
answer_relevancy: 0.8989112186112126
context_recall: 0.25
context_relevancy: 0.0
answer_correctness: 0.9788167699919101
answer_similarity: 0.9152670799676406
Processing RFP question 10...


Evaluating:   0%|          | 0/7 [00:00<?, ?it/s]

Evaluation completed.
context_precision: 0.9999999999
faithfulness: 1.0
answer_relevancy: 0.9047461201964038
context_recall: 0.2
context_relevancy: 0.4375
answer_correctness: 0.9901505791987055
answer_similarity: 0.9606023167948219
Processing RFP question 11...


Evaluating:   0%|          | 0/7 [00:00<?, ?it/s]

Evaluation completed.
context_precision: 0.9999999999
faithfulness: 1.0
answer_relevancy: 0.9009129737018281
context_recall: 0.5
context_relevancy: 0.0
answer_correctness: 0.990606535163346
answer_similarity: 0.9624261406533836
Processing RFP question 12...


Evaluating:   0%|          | 0/7 [00:00<?, ?it/s]

Evaluation completed.
context_precision: 0.9999999999
faithfulness: 1.0
answer_relevancy: 0.8703311117110536
context_recall: 0.3333333333333333
context_relevancy: 0.0
answer_correctness: 0.9772987894564352
answer_similarity: 0.9091951578257409
Processing RFP question 13...


Evaluating:   0%|          | 0/7 [00:00<?, ?it/s]

Evaluation completed.
context_precision: 0.9999999999
faithfulness: 1.0
answer_relevancy: 0.9195716533335515
context_recall: 1.0
context_relevancy: 0.0
answer_correctness: 0.7418784707014128
answer_similarity: 0.9675138828056512
Processing RFP question 14...


Evaluating:   0%|          | 0/7 [00:00<?, ?it/s]

Evaluation completed.
context_precision: 0.9999999999
faithfulness: 1.0
answer_relevancy: 0.9592136009612591
context_recall: 0.0
context_relevancy: 0.0
answer_correctness: 0.9769491898867654
answer_similarity: 0.9077967595470615
Processing RFP question 15...


Evaluating:   0%|          | 0/7 [00:00<?, ?it/s]

Evaluation completed.
context_precision: 0.9999999999
faithfulness: 1.0
answer_relevancy: 0.9237728594260185
context_recall: 0.75
context_relevancy: 0.25
answer_correctness: 0.9823503920069366
answer_similarity: 0.9294015680277465
Processing RFP question 16...


Evaluating:   0%|          | 0/7 [00:00<?, ?it/s]

Evaluation completed.
context_precision: 0.9999999999
faithfulness: 1.0
answer_relevancy: 0.9201136245714331
context_recall: 1.0
context_relevancy: 0.1875
answer_correctness: 0.7431887857974869
answer_similarity: 0.9727551431899477
Processing RFP question 17...


Evaluating:   0%|          | 0/7 [00:00<?, ?it/s]

Evaluation completed.
context_precision: 0.9999999999
faithfulness: 1.0
answer_relevancy: 0.9197367877165871
context_recall: 0.0
context_relevancy: 0.0
answer_correctness: 0.744119942847126
answer_similarity: 0.976497045376926
Processing RFP question 18...


Evaluating:   0%|          | 0/7 [00:00<?, ?it/s]

Failed to parse output. Returning None.

Mean of empty slice



Evaluation completed.
context_precision: 0.9999999999
faithfulness: 1.0
answer_relevancy: nan
context_recall: 1.0
context_relevancy: 0.0
answer_correctness: 0.9945006543187214
answer_similarity: 0.978154318501003
Processing RFP question 19...


Evaluating:   0%|          | 0/7 [00:00<?, ?it/s]

Evaluation completed.
context_precision: 0.9999999999
faithfulness: 1.0
answer_relevancy: 0.9157755867815385
context_recall: 0.6666666666666666
context_relevancy: 0.125
answer_correctness: 0.992488980449869
answer_similarity: 0.9699434582967869
Processing RFP question 20...


Evaluating:   0%|          | 0/7 [00:00<?, ?it/s]

Evaluation completed.
context_precision: 0.9999999999
faithfulness: 1.0
answer_relevancy: 0.9346616923950372
context_recall: 1.0
context_relevancy: 0.0
answer_correctness: 0.9947663539759243
answer_similarity: 0.9790654159036972
Processing RFP question 21...


Evaluating:   0%|          | 0/7 [00:00<?, ?it/s]

Evaluation completed.
context_precision: 0.9999999999
faithfulness: 1.0
answer_relevancy: 0.9209448658921863
context_recall: 0.0
context_relevancy: 0.25
answer_correctness: 0.9937525132219007
answer_similarity: 0.9750100528876029
Processing RFP question 22...


Evaluating:   0%|          | 0/7 [00:00<?, ?it/s]

Evaluation completed.
context_precision: 0.9999999999
faithfulness: 1.0
answer_relevancy: 0.9223195752001881
context_recall: 0.0
context_relevancy: 0.16666666666666666
answer_correctness: 0.9893490835768213
answer_similarity: 0.9573963343072849
Processing RFP question 23...


Evaluating:   0%|          | 0/7 [00:00<?, ?it/s]

Evaluation completed.
context_precision: 0.9999999999
faithfulness: 1.0
answer_relevancy: 0.9188091539259172
context_recall: 1.0
context_relevancy: 0.3333333333333333
answer_correctness: 0.9802832802750678
answer_similarity: 0.9211666299826714
All RFP questions processed.


In [46]:
ragas_results_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 23 entries, 0 to 22
Data columns (total 26 columns):
 #   Column                                Non-Null Count  Dtype 
---  ------                                --------------  ----- 
 0   id                                    23 non-null     int64 
 1   new_rfp                               23 non-null     object
 2   new_question                          23 non-null     object
 3   question                              23 non-null     object
 4   answer                                23 non-null     object
 5   ground_truth                          23 non-null     object
 6   existing_rfp                          23 non-null     object
 7   contexts                              23 non-null     object
 8   question_embeddings                   23 non-null     object
 9   answer_embeddings                     23 non-null     object
 10  context_embeddings                    23 non-null     object
 11  similarity_score_question_vs_conte

In [48]:
ragas_results_df

Unnamed: 0,id,new_rfp,new_question,question,answer,ground_truth,existing_rfp,contexts,question_embeddings,answer_embeddings,...,prompt_tokens,total_tokens,response_time,context_precision,faithfulness,answer_relevancy,context_recall,context_relevancy,answer_correctness,answer_similarity
0,1,rfp_new_questions_client_100.csv,What is your experience in developing AI-based...,"What is the most similar question to: ""What is...",RFP_Question: Can you discuss your expertise i...,Can you discuss your expertise in creating AI-...,rfp_exisiting_questions_client_2.csv,[Project_Title: AI-Powered Risk Assessment Mod...,"[-0.012242779808196847, -0.02875495641236209, ...","[0.028609594368424196, -0.024913389269028082, ...",...,550,573,1.9,1.0,1.0,0.899014,1.0,0.0,0.988876,0.955718
1,2,rfp_new_questions_client_100.csv,How do you ensure your AI-based apps remain up...,"What is the most similar question to: ""How do ...",RFP_Question: How do you keep your AI applicat...,How do you keep your AI applications current w...,rfp_exisiting_questions_client_2.csv,[Project_Title: AI-Powered Risk Assessment Mod...,"[-0.021238304961058666, -0.002903160656478289,...","[0.009174146107929795, 0.004682353866566277, 0...",...,541,561,3.1,1.0,1.0,0.894553,1.0,0.0,0.987639,0.950556
2,3,rfp_new_questions_client_100.csv,Can your AI-based applications be customized t...,"What is the most similar question to: ""Can you...","The most similar question to ""Can your AI-base...",Are your AI applications adaptable to specific...,rfp_exisiting_questions_client_2.csv,"[RFP_Answer: Absolutely, customization is a co...","[-0.024933725810230647, -0.00398689651570446, ...","[-0.018114261370701053, -0.003040488117825552,...",...,532,574,3.2,1.0,1.0,0.900155,1.0,0.0,0.979707,0.91883
3,4,rfp_new_questions_client_100.csv,What measures do you take to ensure user priva...,"What is the most similar question to: ""What me...","The most similar question to ""What measures do...",What steps do you undertake to protect user pr...,rfp_exisiting_questions_client_2.csv,[Project_Title: AI-Powered Risk Assessment Mod...,"[-0.011542554119771953, -0.012979928523676116,...","[-0.017716543891049378, -0.021140465682403832,...",...,608,657,2.7,1.0,1.0,0.925843,0.454545,0.588235,0.980158,0.92063
4,5,rfp_new_questions_client_100.csv,How do you approach user interface and experie...,"What is the most similar question to: ""How do ...",RFP_Question: What strategies do you employ to...,What strategies do you employ to design user i...,rfp_exisiting_questions_client_2.csv,[RFP_Answer: Our design philosophy centers on ...,"[-0.022403337192397944, -0.003745948452053383,...","[0.012766196768228667, 0.02172657166295028, 0....",...,594,620,4.3,1.0,1.0,0.897786,0.0,0.533333,0.98962,0.958481
5,6,rfp_new_questions_client_100.csv,Describe your support and maintenance services...,"What is the most similar question to: ""Describ...","The most similar question to ""Describe your su...",Explain the support and maintenance services y...,rfp_exisiting_questions_client_2.csv,[Project_Title: AI-Powered Risk Assessment Mod...,"[-0.02594255529391264, 0.026052017363914444, 0...","[-0.018622273370422313, 0.016507519223956362, ...",...,587,631,2.9,1.0,1.0,0.892845,0.5,0.444444,0.977844,0.911374
6,7,rfp_new_questions_client_100.csv,How do you measure the success and impact of y...,"What is the most similar question to: ""How do ...",RFP_Question: How do you evaluate the effectiv...,How do you evaluate the effectiveness and impa...,rfp_exisiting_questions_client_2.csv,[Project_Title: AI-Powered Risk Assessment Mod...,"[-0.005094874094156965, -0.0018191395470256865...","[0.01474235542817176, 0.004943857159210323, 0....",...,537,559,2.1,1.0,1.0,0.9063,1.0,0.0,0.739027,0.956108
7,8,rfp_new_questions_client_100.csv,How do you ensure the ethical use of LLMs in y...,"What is the most similar question to: ""How do ...",RFP_Question: How do you manage ethical concer...,How do you manage ethical concerns in your LLM...,rfp_exisiting_questions_client_2.csv,[Project_Title: AI-Powered Risk Assessment Mod...,"[0.021543276731008216, 0.01490634165937239, 0....","[0.02458965545433629, 0.017371860934522247, 0....",...,607,635,2.6,1.0,1.0,0.921054,1.0,0.0,0.991749,0.966996
8,9,rfp_new_questions_client_100.csv,Can you describe the process of training your ...,"What is the most similar question to: ""Can you...","The most similar question to ""Can you describe...","Could you outline how you train your LLMs, inc...",rfp_exisiting_questions_client_2.csv,[Project_Title: AI-Powered Risk Assessment Mod...,"[-0.02396939689706394, 0.018455669672488594, 0...","[-0.018115209323870087, 0.023699326320744635, ...",...,605,666,2.9,1.0,1.0,0.898911,0.25,0.0,0.978817,0.915267
9,10,rfp_new_questions_client_100.csv,How do you handle the continuous learning and ...,"What is the most similar question to: ""How do ...",RFP_Question: How do you ensure your LLMs cont...,How do you ensure your LLMs continuously learn...,rfp_exisiting_questions_client_2.csv,[Project_Title: AI-Powered Risk Assessment Mod...,"[-0.00825047894972602, 0.021084556591604547, 0...","[-0.013420040554404636, 0.028803989073751236, ...",...,633,659,1.8,1.0,1.0,0.904746,0.2,0.4375,0.990151,0.960602


In [None]:
# Save to CSV
ragas_results_df.to_csv('rag_evaluation_results.csv', index=False)