# RAG for Question Similarity in RFPs

## Notebook setup

In [1]:
import pandas as pd

In [2]:
%pip install -qU langchain langchain-openai langchain-cohere


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.1.2[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [3]:
%pip install -qU qdrant-client lark


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.1.2[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [4]:
import os

import dotenv

dotenv.load_dotenv()

if os.getenv("OPENAI_API_KEY") is None:
    raise Exception("OPENAI_API_KEY not found")

In [5]:
import textwrap
from IPython.display import HTML, display
from tabulate import tabulate


def _format_cell_text(text, width=50):
    """Private function to format a cell's text."""
    return "\n".join([textwrap.fill(line, width=width) for line in text.split("\n")])


def _format_dataframe_for_tabulate(df):
    """Private function to format the entire DataFrame for tabulation."""
    df_out = df.copy()

    # Format all string columns
    for column in df_out.columns:
        # Check if column is of type object (likely strings)
        if df_out[column].dtype == object:
            df_out[column] = df_out[column].apply(_format_cell_text)
    return df_out


def _dataframe_to_html_table(df):
    """Private function to convert a DataFrame to an HTML table."""
    headers = df.columns.tolist()
    table_data = df.values.tolist()
    return tabulate(table_data, headers=headers, tablefmt="html")


def display_nice(df, num_rows=None):
    """Primary function to format and display a DataFrame."""
    if num_rows is not None:
        df = df.head(num_rows)
    formatted_df = _format_dataframe_for_tabulate(df)
    html_table = _dataframe_to_html_table(formatted_df)
    display(HTML(html_table))

In [6]:
def print_dict_keys(data, indent=0):
    for key, value in data.items():
        print(' ' * indent + str(key))
        if isinstance(value, dict):  # if the value is another dictionary, recurse
            print_dict_keys(value, indent + 4)

## Data preparation

### Load existing RFPs

In [7]:
# List of CSV file paths
existing_rfp_paths = [
    "datasets/rag/rfp_existing_questions_client_2.csv",
]

existing_rfp_df = [pd.read_csv(file_path) for file_path in existing_rfp_paths]

# Concatenate all DataFrames into one
existing_rfp_df = pd.concat(existing_rfp_df, ignore_index=True)

In [8]:
existing_rfp_df

Unnamed: 0,Project_Title,RFP_Question_ID,RFP_Question,RFP_Answer,Area,Last_Accessed_At,Requester,Status
0,AI-Powered Risk Assessment Model Development f...,1,Can you discuss your expertise in creating AI-...,Our company has 15 years of experience in deve...,General,18/12/2022,Bank B,Awarded
1,AI-Powered Risk Assessment Model Development f...,2,How do you keep your AI applications current w...,We maintain a dedicated R&D team focused on in...,General,18/12/2022,Bank B,Awarded
2,AI-Powered Risk Assessment Model Development f...,3,Are your AI applications adaptable to specific...,"Absolutely, customization is a core aspect of ...",General,18/12/2022,Bank B,Awarded
3,AI-Powered Risk Assessment Model Development f...,4,What steps do you undertake to protect user pr...,User privacy and data security are paramount. ...,General,18/12/2022,Bank B,Awarded
4,AI-Powered Risk Assessment Model Development f...,5,What strategies do you employ to design user i...,Our design philosophy centers on simplicity an...,General,18/12/2022,Bank B,Awarded
5,AI-Powered Risk Assessment Model Development f...,6,Explain the support and maintenance services y...,"Post-launch, we offer comprehensive support an...",General,18/12/2022,Bank B,Awarded
6,AI-Powered Risk Assessment Model Development f...,7,How do you evaluate the effectiveness and impa...,Success measurement is tailored to each projec...,General,18/12/2022,Bank B,Awarded
7,AI-Powered Risk Assessment Model Development f...,8,How do you manage ethical concerns in your LLM...,We adhere to ethical AI practices by implement...,Large Language Models,18/12/2022,Bank B,Awarded
8,AI-Powered Risk Assessment Model Development f...,9,"Could you outline how you train your LLMs, inc...",Our LLM training process begins with the metic...,Large Language Models,18/12/2022,Bank B,Awarded
9,AI-Powered Risk Assessment Model Development f...,10,How do you ensure your LLMs continuously learn...,We implement advanced continuous learning mech...,Large Language Models,18/12/2022,Bank B,Awarded


In [9]:
from langchain_community.document_loaders.csv_loader import CSVLoader

documents = []

# Iterate through each file path in the list
for file_path in existing_rfp_paths:
    loader = CSVLoader(
        file_path=file_path,
        metadata_columns=["Area"]
    )

    # Load a document from the current CSV file
    doc = loader.load()
    
    # Append documents
    documents.extend(doc)

When using `CSVLoader`, each document represents a single row and includes its respective contents:

In [10]:
number_of_documents = 5

for i, document in enumerate(documents[:number_of_documents]):
    print(f"Document {i + 1}: {document}")

Document 1: page_content='Project_Title: AI-Powered Risk Assessment Model Development for Loan Processing\nRFP_Question_ID: 1\nRFP_Question: Can you discuss your expertise in creating AI-driven applications and share examples of your successful implementations?\nRFP_Answer: Our company has 15 years of experience in developing AI-based applications, with a strong portfolio in sectors such as healthcare, finance, and education. For instance, our project MediAI Insight for the healthcare industry demonstrated significant achievements in patient data analysis, resulting in a 30% reduction in diagnostic errors and a 40% improvement in treatment personalization. Our platform has engaged over 200 healthcare facilities, achieving a user satisfaction rate of 95%.\nLast_Accessed_At: 18/12/2022\nRequester: Bank B\nStatus: Awarded' metadata={'source': 'datasets/rag/rfp_existing_questions_client_2.csv', 'row': 0, 'Area': 'General'}
Document 2: page_content='Project_Title: AI-Powered Risk Assessment

Accessing the page content of each document:

In [11]:
number_of_documents = 2

for i, document in enumerate(documents[:number_of_documents]):
    print(f"Page content for document {i + 1}:")
    print(document.page_content)
    print()

Page content for document 1:
Project_Title: AI-Powered Risk Assessment Model Development for Loan Processing
RFP_Question_ID: 1
RFP_Question: Can you discuss your expertise in creating AI-driven applications and share examples of your successful implementations?
RFP_Answer: Our company has 15 years of experience in developing AI-based applications, with a strong portfolio in sectors such as healthcare, finance, and education. For instance, our project MediAI Insight for the healthcare industry demonstrated significant achievements in patient data analysis, resulting in a 30% reduction in diagnostic errors and a 40% improvement in treatment personalization. Our platform has engaged over 200 healthcare facilities, achieving a user satisfaction rate of 95%.
Last_Accessed_At: 18/12/2022
Requester: Bank B
Status: Awarded

Page content for document 2:
Project_Title: AI-Powered Risk Assessment Model Development for Loan Processing
RFP_Question_ID: 2
RFP_Question: How do you keep your AI appli

Note that when adding metadata, it is appended to the default metadata, which consists of the row number and the source: 

In [12]:
number_of_documents = 5

for i, document in enumerate(documents[:number_of_documents]):
    print(f"Metadata for document {i + 1}: {document.metadata}")

Metadata for document 1: {'source': 'datasets/rag/rfp_existing_questions_client_2.csv', 'row': 0, 'Area': 'General'}
Metadata for document 2: {'source': 'datasets/rag/rfp_existing_questions_client_2.csv', 'row': 1, 'Area': 'General'}
Metadata for document 3: {'source': 'datasets/rag/rfp_existing_questions_client_2.csv', 'row': 2, 'Area': 'General'}
Metadata for document 4: {'source': 'datasets/rag/rfp_existing_questions_client_2.csv', 'row': 3, 'Area': 'General'}
Metadata for document 5: {'source': 'datasets/rag/rfp_existing_questions_client_2.csv', 'row': 4, 'Area': 'General'}


## Split the documents into chunks

In [13]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500, chunk_overlap=10, add_start_index=True
)
chunks = text_splitter.split_documents(documents)

Get some general information about the chunks:

In [14]:
print(f"Number of chunks: {len(chunks)}")

Number of chunks: 98


See the length of the bigger and smaller chunks:

In [15]:
max_chunk_length = max([len(chunk.page_content) for chunk in chunks])
min_chunk_length = min([len(chunk.page_content) for chunk in chunks])
mean_chunk_length = sum([len(chunk.page_content) for chunk in chunks]) / len(chunks)

print(f"Maximum chunk length: {max_chunk_length}")
print(f"Minimum chunk length: {min_chunk_length}")
print(f"Mean chunk length: {mean_chunk_length}")

Maximum chunk length: 499
Minimum chunk length: 12
Mean chunk length: 267.6020408163265


Plot the distribution of chunks: 

In [16]:
import plotly.express as px

# Calculate lengths of each chunk's page_content
chunk_lengths = [len(chunk.page_content) for chunk in chunks]

# Creating a histogram of chunk lengths
fig = px.histogram(chunk_lengths, nbins=50, title="Distribution of Chunk Lengths")
fig.update_layout(
    xaxis_title="Chunk Length",
    yaxis_title="Count",
    bargap=0.2,
    showlegend=False
)

# Add summary statistics as text on the plot
fig.add_annotation(
    x=max(chunk_lengths),
    y=0,
    showarrow=False,
    yshift=10
)

# Show the plot
fig.show()

Inspect the chunks: 

In [17]:
number_of_chunks = 5  

for index, chunk in enumerate(chunks[:i]):
    print(f"Chunk {index + 1}: {chunk}")  

Chunk 1: page_content='Project_Title: AI-Powered Risk Assessment Model Development for Loan Processing\nRFP_Question_ID: 1\nRFP_Question: Can you discuss your expertise in creating AI-driven applications and share examples of your successful implementations?' metadata={'source': 'datasets/rag/rfp_existing_questions_client_2.csv', 'row': 0, 'Area': 'General', 'start_index': 0}
Chunk 2: page_content='RFP_Answer: Our company has 15 years of experience in developing AI-based applications, with a strong portfolio in sectors such as healthcare, finance, and education. For instance, our project MediAI Insight for the healthcare industry demonstrated significant achievements in patient data analysis, resulting in a 30% reduction in diagnostic errors and a 40% improvement in treatment personalization. Our platform has engaged over 200 healthcare facilities, achieving a user satisfaction rate of' metadata={'source': 'datasets/rag/rfp_existing_questions_client_2.csv', 'row': 0, 'Area': 'General',

See the page content of each chunk:

In [18]:
number_of_chunks = 5

for i, document in enumerate(chunks[:number_of_chunks]):
    print(f"Page content for chunk {i + 1}:")
    print(document.page_content)
    print()

Page content for chunk 1:
Project_Title: AI-Powered Risk Assessment Model Development for Loan Processing
RFP_Question_ID: 1
RFP_Question: Can you discuss your expertise in creating AI-driven applications and share examples of your successful implementations?

Page content for chunk 2:
RFP_Answer: Our company has 15 years of experience in developing AI-based applications, with a strong portfolio in sectors such as healthcare, finance, and education. For instance, our project MediAI Insight for the healthcare industry demonstrated significant achievements in patient data analysis, resulting in a 30% reduction in diagnostic errors and a 40% improvement in treatment personalization. Our platform has engaged over 200 healthcare facilities, achieving a user satisfaction rate of

Page content for chunk 3:
rate of 95%.

Page content for chunk 4:
Last_Accessed_At: 18/12/2022
Requester: Bank B
Status: Awarded

Page content for chunk 5:
Project_Title: AI-Powered Risk Assessment Model Development

See the metadata for individual chunks:

In [19]:
number_of_chunks = 5  

for i, chunk in enumerate(chunks[:number_of_chunks]):
    print(f"Metadata for chunk {i + 1}: {chunk.metadata}")



Metadata for chunk 1: {'source': 'datasets/rag/rfp_existing_questions_client_2.csv', 'row': 0, 'Area': 'General', 'start_index': 0}
Metadata for chunk 2: {'source': 'datasets/rag/rfp_existing_questions_client_2.csv', 'row': 0, 'Area': 'General', 'start_index': 234}
Metadata for chunk 3: {'source': 'datasets/rag/rfp_existing_questions_client_2.csv', 'row': 0, 'Area': 'General', 'start_index': 723}
Metadata for chunk 4: {'source': 'datasets/rag/rfp_existing_questions_client_2.csv', 'row': 0, 'Area': 'General', 'start_index': 736}
Metadata for chunk 5: {'source': 'datasets/rag/rfp_existing_questions_client_2.csv', 'row': 1, 'Area': 'General', 'start_index': 0}


Access the source of each chunk:

In [20]:
number_of_chunks = 5  

for i, chunk in enumerate(chunks[:number_of_chunks]):
    print(f"Source for chunk {i + 1}: {chunk.metadata['source']}")

Source for chunk 1: datasets/rag/rfp_existing_questions_client_2.csv
Source for chunk 2: datasets/rag/rfp_existing_questions_client_2.csv
Source for chunk 3: datasets/rag/rfp_existing_questions_client_2.csv
Source for chunk 4: datasets/rag/rfp_existing_questions_client_2.csv
Source for chunk 5: datasets/rag/rfp_existing_questions_client_2.csv


## Store chunks into a vectorstore

In [21]:
from langchain.vectorstores.chroma import Chroma
from langchain_openai import OpenAIEmbeddings

embeddings_model = OpenAIEmbeddings(model="text-embedding-3-small")

vectorstore = Chroma.from_documents(
    documents=chunks,
    embedding=embeddings_model,
)

## Create evaluation dataset

In [22]:
# Load all RFPs into a single pandas DataFrame

rag_evaluation_df = pd.read_csv("datasets/rag/rag_evaluation_dataset_v1.csv")

display_nice(rag_evaluation_df, num_rows=2)


id,new_rfp,new_question,question_to_llm,answer,ground_truth,existing_rfp
1,rfp_new_questions_client_100.csv,"What is your experience in developing AI-based applications, and can you provide examples of successful projects?","What is the most similar question to: ""What is your experience in developing AI-based applications, and can you provide examples of successful projects?""",,Can you discuss your expertise in creating AI- driven applications and share examples of your successful implementations?,rfp_exisiting_questions_client_2.csv
2,rfp_new_questions_client_100.csv,How do you ensure your AI-based apps remain up-to- date with the latest AI advancements and technologies?,"What is the most similar question to: ""How do you ensure your AI-based apps remain up-to-date with the latest AI advancements and technologies?""",,How do you keep your AI applications current with ongoing advancements in artificial intelligence?,rfp_exisiting_questions_client_2.csv


In [23]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-4-turbo", temperature=0.0)
retriever = vectorstore.as_retriever(search_kwargs={"k": 20})

In [24]:
from langchain.prompts import ChatPromptTemplate

template = """Answer the question based only on the following context. 
If you cannot answer the question with the context, please respond with 'I don't know':

### CONTEXT
{context}

### QUESTION
Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

In [25]:
from operator import itemgetter

from langchain_openai import ChatOpenAI
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnableLambda, RunnablePassthrough

# Step 1: "question": Retrieved from the "question" key.
# Step 2: "context": Retrieved from the "question" key and fed into the retriever.
# Step 3: "context": Assigned to a RunnablePassthrough object using the "context" key from the previous step.
# Step 4: "answer": "context" and "question" are combined to format the prompt, then sent to the LLM and stored under the "answer" key.
# Step 5: "context": Repopulated using the "context" key from the previous step.

llm = ChatOpenAI(model_name="gpt-4-turbo", temperature=0)

rag_chain = (
    
    {"context": itemgetter("question") | retriever, "question": itemgetter("question")}
    | RunnablePassthrough.assign(context=itemgetter("context"))
    | {"answer": prompt | llm, "context": itemgetter("context")}

)

Ask a question to test the chain:

In [26]:
question = "Find a similar question as this one: 'What is your experience in developing AI-based applications?'"
response = rag_chain.invoke({"question" : question})
print(response)

{'answer': AIMessage(content='RFP_Question_ID: 1\nRFP_Question: Can you discuss your expertise in creating AI-driven applications and share examples of your successful implementations?', response_metadata={'token_usage': {'completion_tokens': 32, 'prompt_tokens': 2074, 'total_tokens': 2106}, 'model_name': 'gpt-4-turbo', 'system_fingerprint': 'fp_76f018034d', 'finish_reason': 'stop', 'logprobs': None}, id='run-91268de3-7ad9-4e91-b754-ac54fa72827c-0'), 'context': [Document(page_content='Project_Title: AI-Powered Risk Assessment Model Development for Loan Processing\nRFP_Question_ID: 1\nRFP_Question: Can you discuss your expertise in creating AI-driven applications and share examples of your successful implementations?', metadata={'Area': 'General', 'row': 0, 'source': 'datasets/rag/rfp_existing_questions_client_2.csv', 'start_index': 0}), Document(page_content='Project_Title: AI-Powered Risk Assessment Model Development for Loan Processing\nRFP_Question_ID: 2\nRFP_Question: How do you ke

As defined in the earlier chat prompt, the RAG response includes two fields: `answer` and `context`:

In [27]:
print_dict_keys(response)

answer
context


Inspecting the answer, we see that the `rag_chain` is functioning correctly and identifies the most similar question in the `vectorstore`:

In [28]:
print(f"Question:")
print(question)
print()
print(f"Answer:")
print(response["answer"].content)

Question:
Find a similar question as this one: 'What is your experience in developing AI-based applications?'

Answer:
RFP_Question_ID: 1
RFP_Question: Can you discuss your expertise in creating AI-driven applications and share examples of your successful implementations?


Next, we inspect the content of the `answer` and the `context` retrieved based on the `question`. The context should contain `k` chunks, the most relevant based on the question. Remember that we set`k` in the `retriever` earlier. These `k` chunks are pasted into the prompt as text, informing the LLM to generate an answer that is closer in the embedding space to the question.

In [29]:
number_of_chunks = 5  

for i, chunk in enumerate(response["context"][:number_of_chunks]):
    print(f"Content for chunk {i + 1}:")  # i + 1 to start counting from 1 instead of 0
    print(chunk.page_content)
    print()

Content for chunk 1:
Project_Title: AI-Powered Risk Assessment Model Development for Loan Processing
RFP_Question_ID: 1
RFP_Question: Can you discuss your expertise in creating AI-driven applications and share examples of your successful implementations?

Content for chunk 2:
Project_Title: AI-Powered Risk Assessment Model Development for Loan Processing
RFP_Question_ID: 2
RFP_Question: How do you keep your AI applications current with ongoing advancements in artificial intelligence?

Content for chunk 3:
RFP_Answer: Our company has 15 years of experience in developing AI-based applications, with a strong portfolio in sectors such as healthcare, finance, and education. For instance, our project MediAI Insight for the healthcare industry demonstrated significant achievements in patient data analysis, resulting in a 30% reduction in diagnostic errors and a 40% improvement in treatment personalization. Our platform has engaged over 200 healthcare facilities, achieving a user satisfaction 

We now inspect the `response_metadata` object to understand its contents and identify what could be useful to incorporate in our RAG evaluation dataset:

In [30]:
print(response["answer"].response_metadata)

{'token_usage': {'completion_tokens': 32, 'prompt_tokens': 2074, 'total_tokens': 2106}, 'model_name': 'gpt-4-turbo', 'system_fingerprint': 'fp_76f018034d', 'finish_reason': 'stop', 'logprobs': None}


In [31]:
print_dict_keys(response["answer"].response_metadata)

token_usage
    completion_tokens
    prompt_tokens
    total_tokens
model_name
system_fingerprint
finish_reason
logprobs


Extracting the LLM used:

In [32]:
print(f"Model: {response['answer'].response_metadata['model_name']}")

Model: gpt-4-turbo


As we showed earlier, we can also extract some token usage statistics that can help us understand and optimize our interactions with the language model for cost-effectiveness and efficiency.

- **Prompt tokens**: tokens that form the input text sent to the language model. This includes all the text provided to the LLM to generate a response.
- **Completion tokens**: number of tokens in the generated text or output from the model.
- **Total tokens**: total number of tokens processed by the model. It is the sum of both `prompt_tokens` and `completion_tokens`. 

In [33]:
print(f"Completion tokens: {response['answer'].response_metadata['token_usage']['completion_tokens']}")
print(f"Prompt tokens: {response['answer'].response_metadata['token_usage']['prompt_tokens']}")
print(f"Total tokens: {response['answer'].response_metadata['token_usage']['total_tokens']}")

Completion tokens: 32
Prompt tokens: 2074
Total tokens: 2106


We will now expand our evaluation dataset to capture some metadata generated by the LLM, which will be used later when validating our RAG pipeline. We will add the following additional columns to our dataframe: `context`, `model_name`, `completion_tokens`, prompt_tokens, and `total_tokens`.

In [34]:
rag_evaluation_df['context'] = ''

rag_evaluation_df['question_embeddings'] = ''
rag_evaluation_df['answer_embeddings'] = ''
rag_evaluation_df['context_embeddings'] = ''

rag_evaluation_df['similarity_score_question_vs_context'] = ''
rag_evaluation_df['similarity_score_question_vs_answer'] = ''
rag_evaluation_df['similarity_score_context_vs_answer'] = ''

rag_evaluation_df['model'] = ''

rag_evaluation_df['completion_tokens'] = ''
rag_evaluation_df['prompt_tokens'] = ''
rag_evaluation_df['total_tokens'] = ''

rag_evaluation_df['response_time'] = ''

We would like to also compute few similarity metrics between embeddings such as cosine similaruty or euclidean distance: 

In [35]:
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

def cosine_similarity_score(embedding1, embedding2):
    """
    Computes the cosine similarity between two embeddings.

    Parameters:
    - embedding1 (array-like): Embedding of the first entity.
    - embedding2 (array-like): Embedding of the second entity.

    Returns:
    - float: Cosine similarity score between the two embeddings.

    Note: The order of the embeddings does not affect the result as cosine similarity is symmetric.
    """
    # Ensure the embeddings are reshaped to 2D arrays for sklearn's cosine_similarity
    embedding1 = np.array(embedding1).reshape(1, -1)
    embedding2 = np.array(embedding2).reshape(1, -1)

    # Calculate and return the cosine similarity
    return cosine_similarity(embedding1, embedding2)[0][0]

In [36]:
import numpy as np

def euclidean_distance(embedding1, embedding2):
    """
    Computes the Euclidean distance between two embeddings.

    Parameters:
    - embedding1 (array-like): First embedding vector.
    - embedding2 (array-like): Second embedding vector.

    Returns:
    - float: Euclidean distance between the two embeddings.
    """
    # Convert inputs to NumPy arrays if they aren't already
    embedding1 = np.array(embedding1)
    embedding2 = np.array(embedding2)
    
    # Calculate and return the Euclidean distance
    return np.linalg.norm(embedding1 - embedding2)

In [37]:
import time

number_of_iterations = 23

for i, (index, row) in enumerate(rag_evaluation_df.iloc[:number_of_iterations].iterrows()):
    print(f"Processing row {i}...")

    # Check if the 'answer' field is 'None' (as a string) for the current row
    if row["answer"] == "None":
        print(f"Answer is 'None' for question ID {index}. Invoking RAG model...")

        start_time = time.time()  # Start timing
        
        # Invoke the RAG model with the question from the current row
        response = rag_chain.invoke({"question": row["question_to_llm"]})

        end_time = time.time()  # End timing

        # Calculate the response time and store it
        rag_evaluation_df.at[index, 'response_time'] = round(end_time - start_time, 1)

        # Store whatever response comes from the LLM
        rag_evaluation_df.at[index, "answer"] = response["answer"].content
        print(f"Question ID {index} answer updated with the response from the RAG model.")
    
        # Store the context included in the prompt
        context = "\n\n".join(chunk.page_content for chunk in response["context"])
        rag_evaluation_df.at[index, "context"] = context
        
        # Compute and store embeddings for the question, context and answer
        print("Computing embeddings for the question...")
        question_embeddings = np.array(embeddings_model.embed_query(row["question_to_llm"]))
        rag_evaluation_df.at[index, 'question_embeddings'] = question_embeddings
        
        print("Computing embeddings for the context...")
        context_embeddings = np.array(embeddings_model.embed_query(context))
        rag_evaluation_df.at[index, 'context_embeddings'] = context_embeddings
        
        print("Computing embeddings for the answer...")
        answer_embeddings = np.array(embeddings_model.embed_query(response["answer"].content))
        rag_evaluation_df.at[index, 'answer_embeddings'] = answer_embeddings
        
        # Compute similarity measures between embeddings 
        print("Computing cosine similarity between question and context...")
        rag_evaluation_df.at[index, 'similarity_score_question_vs_context'] = cosine_similarity_score(question_embeddings, context_embeddings)
        
        print("Computing cosine similarity between question and answer...")
        rag_evaluation_df.at[index, 'similarity_score_question_vs_answer'] = cosine_similarity_score(question_embeddings, answer_embeddings)

        print("Computing cosine similarity between context and answer...")
        rag_evaluation_df.at[index, 'similarity_score_context_vs_answer'] = cosine_similarity_score(context_embeddings, answer_embeddings)
        
        # Store some metadata such as model name and tokens statistics
        rag_evaluation_df.at[index, "model"] = response["answer"].response_metadata["model_name"]
        rag_evaluation_df.at[index, "completion_tokens"] = response['answer'].response_metadata['token_usage']['completion_tokens']
        rag_evaluation_df.at[index, "prompt_tokens"] = response['answer'].response_metadata['token_usage']['prompt_tokens']
        rag_evaluation_df.at[index, "total_tokens"] = response['answer'].response_metadata['token_usage']['total_tokens']

print("Processing complete.")

Processing row 0...
Answer is 'None' for question ID 0. Invoking RAG model...
Question ID 0 answer updated with the response from the RAG model.
Computing embeddings for the question...
Computing embeddings for the context...
Computing embeddings for the answer...
Computing cosine similarity between question and context...
Computing cosine similarity between question and answer...
Computing cosine similarity between context and answer...
Processing row 1...
Answer is 'None' for question ID 1. Invoking RAG model...
Question ID 1 answer updated with the response from the RAG model.
Computing embeddings for the question...
Computing embeddings for the context...
Computing embeddings for the answer...
Computing cosine similarity between question and context...
Computing cosine similarity between question and answer...
Computing cosine similarity between context and answer...
Processing row 2...
Answer is 'None' for question ID 2. Invoking RAG model...
Question ID 2 answer updated with the 

In [38]:
rag_evaluation_df

Unnamed: 0,id,new_rfp,new_question,question_to_llm,answer,ground_truth,existing_rfp,context,question_embeddings,answer_embeddings,context_embeddings,similarity_score_question_vs_context,similarity_score_question_vs_answer,similarity_score_context_vs_answer,model,completion_tokens,prompt_tokens,total_tokens,response_time
0,1,rfp_new_questions_client_100.csv,What is your experience in developing AI-based...,"What is the most similar question to: ""What is...",RFP_Question_ID: 1\nRFP_Question: Can you disc...,Can you discuss your expertise in creating AI-...,rfp_exisiting_questions_client_2.csv,Project_Title: AI-Powered Risk Assessment Mode...,"[-0.012242779808196847, -0.02875495641236209, ...","[0.0310747014081873, -0.027107230157425624, 0....","[-0.002070941676414154, 0.016138519678307824, ...",0.585144,0.725917,0.69799,gpt-4-turbo,32,2050,2082,3.0
1,2,rfp_new_questions_client_100.csv,How do you ensure your AI-based apps remain up...,"What is the most similar question to: ""How do ...",RFP_Question: How do you keep your AI applicat...,How do you keep your AI applications current w...,rfp_exisiting_questions_client_2.csv,Project_Title: AI-Powered Risk Assessment Mode...,"[-0.021238304961058666, -0.002903160656478289,...","[0.009167934150736499, 0.004675850898067104, 0...","[0.003495793715123486, 0.029037076361042993, 0...",0.583265,0.783871,0.681394,gpt-4-turbo,20,2178,2198,2.2
2,3,rfp_new_questions_client_100.csv,Can your AI-based applications be customized t...,"What is the most similar question to: ""Can you...",RFP_Question: Are your AI applications adaptab...,Are your AI applications adaptable to specific...,rfp_exisiting_questions_client_2.csv,"RFP_Answer: Absolutely, customization is a cor...","[-0.024933725810230647, -0.00398689651570446, ...","[-0.009066043640488342, 0.004352435574698996, ...","[0.00557660436251417, 0.017872921644696806, 0....",0.544418,0.724164,0.650534,gpt-4-turbo,18,2110,2128,6.1
3,4,rfp_new_questions_client_100.csv,What measures do you take to ensure user priva...,"What is the most similar question to: ""What me...",RFP_Question: What steps do you undertake to p...,What steps do you undertake to protect user pr...,rfp_exisiting_questions_client_2.csv,Project_Title: AI-Powered Risk Assessment Mode...,"[-0.011542554119771953, -0.012979928523676116,...","[0.01756528858249858, -0.0021651658480433076, ...","[0.005625466015599571, 0.027107654769665805, 0...",0.614859,0.763888,0.690945,gpt-4-turbo,22,2136,2158,2.3
4,5,rfp_new_questions_client_100.csv,How do you approach user interface and experie...,"What is the most similar question to: ""How do ...",RFP_Question: What strategies do you employ to...,What strategies do you employ to design user i...,rfp_exisiting_questions_client_2.csv,RFP_Answer: Our design philosophy centers on s...,"[-0.022403337192397944, -0.003745948452053383,...","[0.012766196768228667, 0.02172657166295028, 0....","[0.003870411601094043, 0.020839706984124785, 0...",0.498187,0.790653,0.596217,gpt-4-turbo,26,2100,2126,3.0
5,6,rfp_new_questions_client_100.csv,Describe your support and maintenance services...,"What is the most similar question to: ""Describ...",RFP_Question_ID: 6\nRFP_Question: Explain the ...,Explain the support and maintenance services y...,rfp_exisiting_questions_client_2.csv,Project_Title: AI-Powered Risk Assessment Mode...,"[-0.02591718013947877, 0.026053946723562156, 0...","[0.0030569415769184254, 0.032351002340196805, ...","[-0.004738790191233332, 0.012303232755733858, ...",0.574956,0.769456,0.641872,gpt-4-turbo,30,2080,2110,5.2
6,7,rfp_new_questions_client_100.csv,How do you measure the success and impact of y...,"What is the most similar question to: ""How do ...","The most similar question to ""How do you measu...",How do you evaluate the effectiveness and impa...,rfp_exisiting_questions_client_2.csv,Project_Title: AI-Powered Risk Assessment Mode...,"[-0.005094874094156965, -0.0018191395470256865...","[-0.00878038568775405, -0.009270509111679268, ...","[0.0035890195577847014, 0.0202937935330464, 0....",0.590167,0.939786,0.617473,gpt-4-turbo,53,2035,2088,4.2
7,8,rfp_new_questions_client_100.csv,How do you ensure the ethical use of LLMs in y...,"What is the most similar question to: ""How do ...",RFP_Question_ID: 8\nRFP_Question: How do you m...,How do you manage ethical concerns in your LLM...,rfp_exisiting_questions_client_2.csv,Project_Title: AI-Powered Risk Assessment Mode...,"[0.021570483446575122, 0.014879887146777028, 0...","[0.026099357489269805, 0.012381280646806315, 0...","[0.006703774816147418, 0.014809646435435513, 0...",0.617357,0.829949,0.669105,gpt-4-turbo,37,2063,2100,4.3
8,9,rfp_new_questions_client_100.csv,Can you describe the process of training your ...,"What is the most similar question to: ""Can you...",RFP_Question_ID: 9\nRFP_Question: Could you ou...,"Could you outline how you train your LLMs, inc...",rfp_exisiting_questions_client_2.csv,Project_Title: AI-Powered Risk Assessment Mode...,"[-0.02396939689706394, 0.018455669672488594, 0...","[0.011562094135351562, 0.019687660111660777, 0...","[0.013534342148055555, 0.016004392023382625, 0...",0.568258,0.761843,0.668359,gpt-4-turbo,38,2199,2237,3.8
9,10,rfp_new_questions_client_100.csv,How do you handle the continuous learning and ...,"What is the most similar question to: ""How do ...","The most similar question to ""How do you handl...",How do you ensure your LLMs continuously learn...,rfp_exisiting_questions_client_2.csv,Project_Title: AI-Powered Risk Assessment Mode...,"[-0.008237089426533709, 0.02109827336666294, 0...","[-0.014028603505788242, 0.019560468426855514, ...","[0.011712163709805153, 0.005895829305481538, 0...",0.527055,0.956986,0.53162,gpt-4-turbo,58,2203,2261,4.6


In [39]:
# Save to CSV
rag_evaluation_df.to_csv('rag_evaluation_results.csv', index=False)

In [None]:
display_nice(rag_evaluation_df, num_rows=5)

In [None]:
from ragas.metrics import (
    answer_relevancy,
    faithfulness,
    context_recall,
    context_precision,
    context_relevancy,
    answer_correctness,
    answer_similarity
)

from ragas.metrics.critique import harmfulness
from ragas import evaluate

def create_ragas_dataset(rag_pipeline, eval_dataset):
  rag_dataset = []
  for row in tqdm(eval_dataset):
    answer = rag_pipeline.invoke({"question" : row["question"]})
    rag_dataset.append(
        {"question" : row["question"],
         "answer" : answer["response"].content,
         "contexts" : [context.page_content for context in answer["context"]],
         "ground_truths" : [row["ground_truth"]]
         }
    )
  rag_df = pd.DataFrame(rag_dataset)
  rag_eval_dataset = Dataset.from_pandas(rag_df)
  return rag_eval_dataset

def evaluate_ragas_dataset(ragas_dataset):
  result = evaluate(
    ragas_dataset,
    metrics=[
        context_precision,
        faithfulness,
        answer_relevancy,
        context_recall,
        context_relevancy,
        answer_correctness,
        answer_similarity
    ],
  )
  return result

In [None]:
basic_qa_result = evaluate_ragas_dataset(basic_qa_ragas_dataset)

In [None]:
naive_retriever = vectorstore.as_retriever(search_kwargs={"k" : 10})

In [None]:
from langchain_core.prompts import ChatPromptTemplate

RAG_TEMPLATE = """\
You are a helpful and kind assistant. Use the context provided below to answer the question.

If you do not know the answer, or are unsure, say you don't know.

Query:
{question}

Context:
{context}
"""

rag_prompt = ChatPromptTemplate.from_template(RAG_TEMPLATE)

In [None]:
run = False
if run:

    from typing import List

    from langchain_core.runnables import RunnableParallel, RunnablePassthrough
    from langchain_core.output_parsers import StrOutputParser

    # This function formats a list of Document objects into a single string.
    # Each document's content and source are formatted and separated by two newlines.
    def format_docs(docs: List[Document]) -> str:
        return "\n\n".join(
            f"Content: {doc.page_content}\nSource: {doc.metadata['source']}" for doc in docs
        )

    # This chain is used for processing 'source_documents'. It formats the documents
    # using the 'format_docs' function, then passes the formatted string through
    # subsequent unspecified operations (`prompt`, `llm`) and finally parses the output to a string.
    rag_chain_from_docs = (
        RunnablePassthrough.assign(
            source_documents=(lambda x: format_docs(x["source_documents"]))
        )
        | prompt  # Uses the 'prompt' template to format the context and question.
        | llm     # Uses the language model to generate an answer.
        | StrOutputParser()  # Parses the output from the language model into a string format.
    )

    # This RunnableParallel constructs a parallel chain for processing.
    # It takes 'source_documents' from a retriever and a 'question' as inputs.
    # The 'answer' part of the chain is assigned to the previously defined 'rag_chain_from_docs'.
    rag_chain = RunnableParallel(
        {
            "source_documents": retriever,  
            "question": RunnablePassthrough(),  # Passes the question through without modification.
        }
    ).assign(answer=rag_chain_from_docs)  # The final output is determined by the chain that processes documents.

In [None]:
import hashlib
import json
from langchain_core.documents import Document

def stable_hash_meta(doc: Document) -> str:
    """
    Stable hash document based on its metadata. Assumes 'metadata' is always present.
    """
    try:
        metadata_json = json.dumps(doc.metadata, sort_keys=True)
    except AttributeError:
        raise ValueError("Document does not have metadata.")
    return hashlib.sha1(metadata_json.encode()).hexdigest()

In [None]:
#splits_ids = [{"doc": split, "id": stable_hash_meta(split)} for split in splits]

#existing_ids = vectorstore.get()["ids"]

#new_splits_ids = [split for split in splits_ids if split["id"] not in existing_ids]