In [3]:
import os
from dotenv import load_dotenv
load_dotenv(encoding='utf-8')

True

# RAG pipeline

## Vector Store and Retriever

In [4]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_mongodb import MongoDBAtlasVectorSearch
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain.prompts import PromptTemplate
from pymongo import MongoClient
from langchain.retrievers.contextual_compression import ContextualCompressionRetriever
from langchain_cohere import CohereRerank
from langchain_community.llms import Cohere
# from llama_index.postprocessor.flag_embedding_reranker import (
#     FlagEmbeddingReranker,
# )
from langchain.retrievers.document_compressors import LLMChainExtractor
from langchain.retrievers.document_compressors import CrossEncoderReranker
from langchain_community.cross_encoders import HuggingFaceCrossEncoder


* 'allow_population_by_field_name' has been renamed to 'populate_by_name'
* 'smart_union' has been removed


In [5]:
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

# llm = ChatOpenAI(model=os.getenv("DEFAULT_OPENAI_MODEL")) # DEFAULT_OPENAI_MODEL='gpt-4o-mini-2024-07-18'
llm = ChatOpenAI(model="gpt-4o-mini") # DEFAULT_OPENAI_MODEL='gpt-4o-mini-2024-07-18'

# embedding_model=OpenAIEmbeddings(model=os.getenv("DEFAULT_OPENAI_EMBEDDING"), disallowed_special=())
embedding_model=OpenAIEmbeddings(disallowed_special=())

In [6]:
# Define MongoDB vector database
client = MongoClient(os.getenv("ATLAS_CONNECTION_STRING"))
db_name = os.getenv("db_name")
collection_name="enterprise_data"
atlas_collection = client[db_name][collection_name]
index_name = "vector_index_erp"
compression_retriever_model = "cohere"

In [7]:

# Define MongoDB vector database
client = MongoClient(os.getenv("ATLAS_CONNECTION_STRING"))
db_name = os.getenv("db_name")
collection_name="enterprise_data"
atlas_collection = client[db_name][collection_name]
index_name = "vector_index_erp"

# Define vector store and retriever
vector_store = MongoDBAtlasVectorSearch(
    embedding = embedding_model,
    collection = atlas_collection,
    index_name = index_name
)

retriever = vector_store.as_retriever(
    search_type = "similarity",
    search_kwargs = { "k": 20}  # default "score_threshold": 0.75 
)

In [71]:
# Helper function for printing docs
def pretty_print_docs(docs):
    print(
        f"\n{'-' * 100}\n".join(
            [f"Document {i+1}:\n\n" + d.page_content for i, d in enumerate(docs)]
        )
    )

## Reranking Functions

In [8]:
# Get the compressed retriever
# from FlagEmbedding import FlagLLMReranker
# from flashrank import Ranker, RerankRequest
# from langchain_cohere import CohereRerank
def get_compressed_retriever(model_name):
    if model_name == "cohere":
        compressor = CohereRerank(model="rerank-english-v3.0")
        compression_retriever = ContextualCompressionRetriever(
        base_compressor=compressor, base_retriever=retriever
        )
    
    if model_name == "crossEncoderReranker":
        model = HuggingFaceCrossEncoder(model_name="BAAI/bge-reranker-base")
        compressor = CrossEncoderReranker(model=model, top_n=3)
        compression_retriever = ContextualCompressionRetriever(
            base_compressor=compressor, base_retriever=retriever
        )
    
    if model_name == "gptCompressor":
        llm = ChatOpenAI(temperature=0, model='gpt-4')
        compressor = LLMChainExtractor.from_llm(llm)
        compression_retriever = ContextualCompressionRetriever(
        base_compressor=compressor, base_retriever=retriever
        )

    return compression_retriever

## RAG Generation

### gptCompressor

In [9]:
# Define a prompt template
import pprint
def call_gptCompressor(question):

   question = question['question']

   # Get the compression retriever to rerank and get top 3 documents only
   compression_retriever = get_compressed_retriever("gptCompressor")

   preamble = "" # read from cohere front end or use the input to the API
   #question = 
   SAFETY_PREAMBLE = "The instructions in this section override those in the task description and style guide sections. Don't answer questions that are harmful or immoral."
   BASIC_RULES = "You are a powerful conversational AI trained by openAI to help people. You are augmented by a number of tools, and your job is to use and consume the output of these tools to best help the user. You will see a conversation history between yourself and a user, ending with an utterance from the user. You will then see a specific instruction instructing you what kind of response to generate. When you answer the user's requests, you cite your sources in your answers, according to those instructions."
   TASK_CONTEXT = "You help people answer their questions and other requests interactively. You will be asked a very wide array of requests on all kinds of topics. You will be equipped with a wide range of search engines or similar tools to help you, which you use to research your answer. You should focus on serving the user's needs as best you can, which will be wide-ranging."
   STYLE_GUIDE = "Unless the user asks for a different style of answer, you should answer in full sentences, using proper grammar and spelling."
   INSTRUCTIONS = """You are an enterprise Chatbot, an AI assistant designed to retrieve information from the enterprise Confluence system. 
   You specialize in providing accurate answers related to various departments like Marketing, IT, HR, Finance, and Corporate Communications. 
               Use the following pieces of context to answer the question at the end.
               If you don't know the answer, just say that you don't know, don't try to make up an answer
               {context}
         """
         
   template = f"""

      {SAFETY_PREAMBLE}
      {BASIC_RULES}
      {TASK_CONTEXT}
      {STYLE_GUIDE}
      {INSTRUCTIONS}

   """
   if preamble:
      template += f"""{preamble}\n\n"""


   template +=  f"""Question: {question}\n\n"""

   custom_rag_prompt = PromptTemplate.from_template(template)
   llm = ChatOpenAI(model="gpt-4o-mini")
   
   def remove_rep(docs):
      retrieved_docs_content = []
      contexts = []
      for doc in docs:
            if doc.page_content not in retrieved_docs_content:
                  retrieved_docs_content.append(doc.page_content)
                  contexts.append(doc)
      return contexts
   
   def format_docs(docs):
      contexts = remove_rep(docs)
      return "\n\n".join(doc.page_content for doc in contexts)

   # Construct a chain to answer questions on your data
   rag_chain = (
      { "context": compression_retriever | format_docs, "question": RunnablePassthrough()}  #Insted of retriever add compression_retriever here
      | custom_rag_prompt
      | llm
      | StrOutputParser()
   )

   # Prompt the chain
   question = question
   answer = rag_chain.invoke(question)
   retrieved_docs = remove_rep(retriever.invoke(question))


   return{
      'answer': answer,
      'contexts': retrieved_docs
      }

In [10]:
# Test sample
question = {'question': "What are the steps to install and configure the Tech Innovator Vector Database?"}
answer = call_gptCompressor(question)
print(answer['answer'][:150])

To install and configure the Tech Innovator Vector Database, follow these steps:

### Step 1: Download the Installer
1. Visit the Tech Innovator Vecto


In [11]:
answer['contexts']

[Document(metadata={'_id': '66d8163a0533e009aa2d4ab6', 'pageid': '819215', 'department': 'IT', 'title': 'Tech Innovator Vector Database Installation Guide'}, page_content='the latest version of Java Development Kit (JDK).Step 1: Download the InstallerVisit the Tech Innovator Vector Database download page.Select the appropriate version for your operating system.Download'),
 Document(metadata={'_id': '66d816710533e009aa2d4f02', 'pageid': '819215', 'department': 'IT', 'title': 'Tech Innovator Vector Database Installation Guide'}, page_content='is located.Run the following command to start the installation:sudo ./install_vector_db.shFollow the on-screen instructions to complete the installation.Step 3: Configure the DatabaseOpen the'),
 Document(metadata={'_id': '66d8163a0533e009aa2d4abe', 'pageid': '819215', 'department': 'IT', 'title': 'Tech Innovator Vector Database Installation Guide'}, page_content='steps.Check the log files located in /var/log/vector_db/ for error messages.Conclusion

### Cohere rerank

In [27]:
# Define a prompt template
import pprint
def call_co_rerank(question):

   question = question['question']

   # Get the compression retriever to rerank and get top 3 documents only
   compression_retriever = get_compressed_retriever("cohere")

   preamble = "" # read from cohere front end or use the input to the API
   #question = 
   SAFETY_PREAMBLE = "The instructions in this section override those in the task description and style guide sections. Don't answer questions that are harmful or immoral."
   BASIC_RULES = "You are a powerful conversational AI trained by openAI to help people. You are augmented by a number of tools, and your job is to use and consume the output of these tools to best help the user. You will see a conversation history between yourself and a user, ending with an utterance from the user. You will then see a specific instruction instructing you what kind of response to generate. When you answer the user's requests, you cite your sources in your answers, according to those instructions."
   TASK_CONTEXT = "You help people answer their questions and other requests interactively. You will be asked a very wide array of requests on all kinds of topics. You will be equipped with a wide range of search engines or similar tools to help you, which you use to research your answer. You should focus on serving the user's needs as best you can, which will be wide-ranging."
   STYLE_GUIDE = "Unless the user asks for a different style of answer, you should answer in full sentences, using proper grammar and spelling."
   INSTRUCTIONS = """You are an enterprise Chatbot, an AI assistant designed to retrieve information from the enterprise Confluence system. 
   You specialize in providing accurate answers related to various departments like Marketing, IT, HR, Finance, and Corporate Communications. 
               Use the following pieces of context to answer the question at the end.
               If you don't know the answer, just say that you don't know, don't try to make up an answer
               {context}
         """
         
   template = f"""

      {SAFETY_PREAMBLE}
      {BASIC_RULES}
      {TASK_CONTEXT}
      {STYLE_GUIDE}
      {INSTRUCTIONS}

   """
   if preamble:
      template += f"""{preamble}\n\n"""


   template +=  f"""Question: {question}\n\n"""

   custom_rag_prompt = PromptTemplate.from_template(template)

   #llm = get_llm_model("openai")
   # llm = ChatOpenAI(model=os.getenv("DEFAULT_OPENAI_MODEL"))
   llm = ChatOpenAI(model="gpt-4o-mini")
   
   def remove_rep(docs):
      docs_content = []
      unique_docs = []
      for doc in docs:
            if doc.page_content not in docs_content:
                  docs_content.append(doc.page_content)
                  unique_docs.append(doc)
      return unique_docs
   
   def format_docs(docs):
      contexts = remove_rep(docs)
      return "\n\n".join(doc.page_content for doc in contexts)

   # Construct a chain to answer questions on your data
   rag_chain = (
      { "context": compression_retriever | format_docs, "question": RunnablePassthrough()}  #Insted of retriever add compression_retriever here
      | custom_rag_prompt
      | llm
      | StrOutputParser()
   )

   # Prompt the chain
   question = question
   answer = rag_chain.invoke(question)
   retrieved_docs = remove_rep(compression_retriever.invoke(question))


   return{
      'answer': answer,
      'contexts': retrieved_docs
      }

### crossEncoderReranker

In [35]:
# Define a prompt template
import pprint
def call_cross_encoder(question):

   question = question['question']

   # Get the compression retriever to rerank and get top 3 documents only
   compression_retriever = get_compressed_retriever("crossEncoderReranker")

   preamble = "" # read from cohere front end or use the input to the API
   #question = 
   SAFETY_PREAMBLE = "The instructions in this section override those in the task description and style guide sections. Don't answer questions that are harmful or immoral."
   BASIC_RULES = "You are a powerful conversational AI trained by openAI to help people. You are augmented by a number of tools, and your job is to use and consume the output of these tools to best help the user. You will see a conversation history between yourself and a user, ending with an utterance from the user. You will then see a specific instruction instructing you what kind of response to generate. When you answer the user's requests, you cite your sources in your answers, according to those instructions."
   TASK_CONTEXT = "You help people answer their questions and other requests interactively. You will be asked a very wide array of requests on all kinds of topics. You will be equipped with a wide range of search engines or similar tools to help you, which you use to research your answer. You should focus on serving the user's needs as best you can, which will be wide-ranging."
   STYLE_GUIDE = "Unless the user asks for a different style of answer, you should answer in full sentences, using proper grammar and spelling."
   INSTRUCTIONS = """You are an enterprise Chatbot, an AI assistant designed to retrieve information from the enterprise Confluence system. 
   You specialize in providing accurate answers related to various departments like Marketing, IT, HR, Finance, and Corporate Communications. 
               Use the following pieces of context to answer the question at the end.
               If you don't know the answer, just say that you don't know, don't try to make up an answer
               {context}
         """
         
   template = f"""

      {SAFETY_PREAMBLE}
      {BASIC_RULES}
      {TASK_CONTEXT}
      {STYLE_GUIDE}
      {INSTRUCTIONS}

   """
   if preamble:
      template += f"""{preamble}\n\n"""


   template +=  f"""Question: {question}\n\n"""

   custom_rag_prompt = PromptTemplate.from_template(template)

   #llm = get_llm_model("openai")
   # llm = ChatOpenAI(model=os.getenv("DEFAULT_OPENAI_MODEL"))
   llm = ChatOpenAI(model="gpt-4o-mini")
   
   def remove_rep(docs):
      retrieved_docs_content = []
      contexts = []
      for doc in docs:
            if doc.page_content not in retrieved_docs_content:
                  retrieved_docs_content.append(doc.page_content)
                  contexts.append(doc)
      return contexts
   
   def format_docs(docs):
      contexts = remove_rep(docs)
      return "\n\n".join(doc.page_content for doc in contexts)

   # Construct a chain to answer questions on your data
   rag_chain = (
      { "context": compression_retriever | format_docs, "question": RunnablePassthrough()}  #Insted of retriever add compression_retriever here
      | custom_rag_prompt
      | llm
      | StrOutputParser()
   )

   # Prompt the chain
   question = question
   answer = rag_chain.invoke(question)
   retrieved_docs = remove_rep(compression_retriever.invoke(question))


   return{
      'answer': answer,
      'contexts': retrieved_docs
      }

In [37]:
# Test sample
question = {'question': "What are the steps to install and configure the Tech Innovator Vector Database?"}
answer = call_cross_encoder(question)
print(answer['answer'][:150])

To install and configure the Tech Innovator Vector Database, follow these steps:

1. **Download the Installation Package**: Obtain the installation pa


In [38]:
answer['contexts']

[Document(metadata={'_id': '66d8163a0533e009aa2d4abe', 'pageid': '819215', 'department': 'IT', 'title': 'Tech Innovator Vector Database Installation Guide'}, page_content='steps.Check the log files located in /var/log/vector_db/ for error messages.ConclusionYou have successfully installed and configured the Tech Innovator Vector Database. You can now start using it for')]

# RAG pipeline evaluation

## Test data set prep

In [12]:
import pandas as pd
import json

def json_to_dataframe(json_file_path):
  """Reads a JSON file and converts it to a pandas DataFrame.

  Args:
    json_file_path (str): The path to the JSON file.

  Returns:
    pandas.DataFrame: The DataFrame created from the JSON data.
  """

  with open(json_file_path, 'r') as f:
    data = json.load(f)

  # Handle different JSON structures
  if isinstance(data, list):
    # If the JSON data is a list of dictionaries, create a DataFrame directly
    df = pd.DataFrame(data)
  elif isinstance(data, dict):
    # If the JSON data is a single dictionary, convert it to a list of dictionaries
    df = pd.DataFrame([data])
  else:
    raise ValueError("Unsupported JSON structure")

  return df

In [13]:
def extract_page_content(documents):
    return [doc.page_content for doc in documents]

In [39]:
from from_root import from_root
import os
file_name = "test_dataset_it.json"
json_file_path = os.path.join(from_root(), "data-test/test-dataset/",file_name)
data_to_test = json_to_dataframe(json_file_path)

## RAGAS evaluation

### gptCompressor

In [15]:
# Generate all the answers for the questions in the dataset
gpt_compressor_answers = []
gpt_compressor_contexts = []
for question in data_to_test['question']:
    question_dict = {'question': question}
    answer = call_gptCompressor(question_dict)
    gpt_compressor_contexts.append(answer['contexts'])
    gpt_compressor_answers.append(answer['answer'])

In [16]:
# update the dataset with answers
data_to_test['answers'] = gpt_compressor_answers
data_to_test['contexts'] = gpt_compressor_contexts

In [17]:
# Replace empty list context with ['No context'] if there are any
def is_empty_list(lst):
    return len(lst) == 0
data_to_test['contexts'] = data_to_test['contexts'].apply(lambda x: ['No context'] if is_empty_list(x) else x)

In [18]:
from datasets import Dataset

question = list(data_to_test['question'])
answer = list(data_to_test['answers'])
contexts = list(data_to_test['contexts'].apply(extract_page_content))
ground_truth = list(data_to_test['ground_truth'])

data_gpt_compressor = {
    'question': question,
    'answer': answer,
    'contexts': contexts,
    'ground_truth': ground_truth
}

dataset_gpt_compressor = Dataset.from_dict(data_gpt_compressor)

In [52]:
# # Convert the dictionary to a DataFrame
# df = pd.DataFrame(data_gpt_compressor)

# # Save the DataFrame to a CSV file
# # df.to_csv('data_gpt_compressor_generation.csv', index=False)
# save_dataframe_with_list_column(df, 'data_gpt_compressor_generation.csv')

In [None]:
# Optional, uncomment to trace runs with LangSmith. Sign up here: https://smith.langchain.com.
# from langsmith import Client
# os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
# os.environ["LANGCHAIN_TRACING_V2"] = "true"
# os.environ["LANGSMITH_API_KEY"] = os.getenv("LANGSMITH_API_KEY")
# client = Client()

In [20]:
from ragas import evaluate
# from ragas.integrations.langsmith import evaluate
from ragas.metrics import (
    answer_relevancy,
    faithfulness,
    context_recall,
    context_precision,
)
result = evaluate(
    dataset_gpt_compressor,
    metrics=[
        answer_relevancy,
        faithfulness,
        context_recall,
        context_precision,
    ],
)

Evaluating:   0%|          | 0/36 [00:00<?, ?it/s]

In [21]:
df = result.to_pandas()
df

Unnamed: 0,question,answer,contexts,ground_truth,answer_relevancy,faithfulness,context_recall,context_precision
0,How does the role of the Senior Director respo...,The role of the Senior Director responsible fo...,[the Senior Director responsible for Analytics...,The role of the Senior Director responsible fo...,0.925206,0.3,0.272727,0.8875
1,What is the importance of identifying and addr...,Identifying and addressing growth areas in sel...,[to identify strengths and areas for improveme...,Identifying and addressing growth areas in sel...,0.990183,0.333333,0.333333,0.0
2,What forms of unethical behavior are strictly ...,In the recruitment process at Tech Innovators ...,[Inc. upholds the highest ethical standards in...,Favoritism or nepotism,0.949537,1.0,1.0,0.75
3,What is the significance of emotional and aest...,Emotional and aesthetic labor hold significant...,[LabourEmotional and aesthetic labor involves ...,Emotional and aesthetic labor in the workplace...,1.0,0.772727,1.0,1.0
4,What is the purpose of the orientation session...,The purpose of the orientation session at Tech...,[see you thrive at Tech Innovators Inc. Welcom...,The purpose of the orientation session at Tech...,1.0,0.666667,0.0,0.0
5,What mechanisms are in place for reporting vio...,"At Tech Innovators Inc., employees can report ...",[and identify areas for improvement.5.3 Report...,Employees can report violations of labor laws ...,0.951613,0.111111,0.25,1.0
6,How do employee engagement and disengagement d...,Employee engagement and disengagement differ s...,"[are motivated and committed, disengaged emplo...",Employee engagement and disengagement differ i...,0.960525,0.5,1.0,0.866667
7,What steps are needed to extract data from Con...,To extract data from Confluence and create a R...,[IntroductionThis guide provides a step-by-ste...,To extract data from Confluence and create a R...,0.989648,0.35,0.181818,1.0
8,How does Tech Innovators Inc. promote employee...,Tech Innovators Inc. promotes employee engagem...,[IntroductionTech Innovators Inc. is committed...,Tech Innovators Inc. promotes employee engagem...,0.980843,0.4,0.8,0.25


In [22]:
file_name = "eval_result_test_dataset_it_gpt_compressor.csv"
file_path = os.path.join(from_root(),"data-test/eval-result/", file_name)
df.to_csv(file_path, index=False)

### Cohere rerank

In [28]:
# Generate all the answers for the questions in the dataset
cohere_answers = []
cohere_contexts = []
for question in data_to_test['question']:
    question_dict = {'question': question}
    answer = call_co_rerank(question_dict)
    cohere_contexts.append(answer['contexts'])
    cohere_answers.append(answer['answer'])

In [29]:
# update the dataset with answers
data_to_test['answers'] = cohere_answers
data_to_test['contexts'] = cohere_contexts

In [30]:
# Replace empty list context with ['No context'] if there are any
def is_empty_list(lst):
    return len(lst) == 0
data_to_test['contexts'] = data_to_test['contexts'].apply(lambda x: ['No context'] if is_empty_list(x) else x)

In [31]:
from datasets import Dataset

question = list(data_to_test['question'])
answer = list(data_to_test['answers'])
contexts = list(data_to_test['contexts'].apply(extract_page_content))
ground_truth = list(data_to_test['ground_truth'])

data_cohere_rerank= {
    'question': question,
    'answer': answer,
    'contexts': contexts,
    'ground_truth': ground_truth
}

dataset_cohere_rerank = Dataset.from_dict(data_cohere_rerank)

In [None]:
# Optional, uncomment to trace runs with LangSmith. Sign up here: https://smith.langchain.com.
# from langsmith import Client
# os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
# os.environ["LANGCHAIN_TRACING_V2"] = "true"
# os.environ["LANGSMITH_API_KEY"] = os.getenv("LANGSMITH_API_KEY")
# client = Client()

In [32]:
from ragas import evaluate
# from ragas.integrations.langsmith import evaluate
from ragas.metrics import (
    answer_relevancy,
    faithfulness,
    context_recall,
    context_precision,
)
result = evaluate(
    dataset_cohere_rerank,
    metrics=[
        answer_relevancy,
        faithfulness,
        context_recall,
        context_precision,
    ],
)

result

Evaluating:   0%|          | 0/36 [00:00<?, ?it/s]

{'answer_relevancy': 0.7481, 'faithfulness': 0.2869, 'context_recall': 0.2969, 'context_precision': 0.7778}

In [33]:
df = result.to_pandas()
df

Unnamed: 0,question,answer,contexts,ground_truth,answer_relevancy,faithfulness,context_recall,context_precision
0,How does the role of the Senior Director respo...,The role of the Senior Director responsible fo...,[the Senior Director responsible for Analytics...,The role of the Senior Director responsible fo...,0.925206,0.45,0.222222,1.0
1,What is the importance of identifying and addr...,Identifying and addressing growth areas in sel...,[to identify strengths and areas for improveme...,Identifying and addressing growth areas in sel...,0.990183,0.4,0.333333,0.0
2,What forms of unethical behavior are strictly ...,I don't know.,[Inc. upholds the highest ethical standards in...,Favoritism or nepotism,0.0,0.0,0.0,1.0
3,What is the significance of emotional and aest...,Emotional and aesthetic labor are significant ...,[LabourEmotional and aesthetic labor involves ...,Emotional and aesthetic labor in the workplace...,0.968129,0.565217,0.666667,1.0
4,What is the purpose of the orientation session...,I don't know.,[IntroductionWelcome to Tech Innovators Inc. T...,The purpose of the orientation session at Tech...,0.0,0.0,0.0,0.0
5,What mechanisms are in place for reporting vio...,"At Tech Innovators Inc., employees can report ...",[and identify areas for improvement.5.3 Report...,Employees can report violations of labor laws ...,0.951613,0.111111,0.25,1.0
6,How do employee engagement and disengagement d...,Employee engagement and disengagement differ s...,"[are motivated and committed, disengaged emplo...",Employee engagement and disengagement differ i...,0.920337,0.814815,0.4,1.0
7,What steps are needed to extract data from Con...,To extract data from Confluence and create a R...,[IntroductionThis guide provides a step-by-ste...,To extract data from Confluence and create a R...,0.99666,0.074074,0.0,1.0
8,How does Tech Innovators Inc. promote employee...,Tech Innovators Inc. promotes employee engagem...,[concept that requires a holistic approach. At...,Tech Innovators Inc. promotes employee engagem...,0.980843,0.166667,0.8,1.0


In [34]:
file_name = "eval_result_test_dataset_it_cohere_rerank.csv"
file_path = os.path.join(from_root(),"data-test/eval-result/", file_name)
df.to_csv(file_path, index=False)

### crossEncoderReranker

In [41]:
# Generate all the answers for the questions in the dataset
cer_answers = []
cer_contexts = []
for question in data_to_test['question']:
    question_dict = {'question': question}
    answer = call_cross_encoder(question_dict)
    cer_contexts.append(answer['contexts'])
    cer_answers.append(answer['answer'])

In [49]:
# update the dataset with answers
data_to_test['answer'] = cer_answers
data_to_test['contexts'] = cer_contexts

In [None]:
# Replace empty list context with ['No context'] if there are any
def is_empty_list(lst):
    return len(lst) == 0
data_to_test['contexts'] = data_to_test['contexts'].apply(lambda x: ['No context'] if is_empty_list(x) else x)

In [52]:
from datasets import Dataset

question = list(data_to_test['question'])
answer = list(data_to_test['answer'])
contexts = list(data_to_test['contexts'].apply(extract_page_content))
ground_truth = list(data_to_test['ground_truth'])

data_cer= {
    'question': question,
    'answer': answer,
    'contexts': contexts,
    'ground_truth': ground_truth
}

dataset_cer = Dataset.from_dict(data_cer)

In [None]:
# Optional, uncomment to trace runs with LangSmith. Sign up here: https://smith.langchain.com.
# from langsmith import Client
# os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
# os.environ["LANGCHAIN_TRACING_V2"] = "true"
# os.environ["LANGSMITH_API_KEY"] = os.getenv("LANGSMITH_API_KEY")
# client = Client()

In [54]:
from ragas import evaluate
# from ragas.integrations.langsmith import evaluate
from ragas.metrics import (
    answer_relevancy,
    faithfulness,
    context_recall,
    context_precision,
)
result = evaluate(
    dataset_cer,
    metrics=[
        answer_relevancy,
        faithfulness,
        context_recall,
        context_precision,
    ],
)

result

Evaluating:   0%|          | 0/36 [00:00<?, ?it/s]

{'answer_relevancy': 0.9664, 'faithfulness': 0.2256, 'context_recall': 0.4040, 'context_precision': 0.7778}

In [55]:
df = result.to_pandas()
df

Unnamed: 0,question,answer,contexts,ground_truth,answer_relevancy,faithfulness,context_recall,context_precision
0,How does the role of the Senior Director respo...,The role of the Senior Director responsible fo...,[the Senior Director responsible for Analytics...,The role of the Senior Director responsible fo...,0.925206,0.090909,0.222222,1.0
1,What is the importance of identifying and addr...,Identifying and addressing growth areas in sel...,[to identify strengths and areas for improveme...,Identifying and addressing growth areas in sel...,0.991927,0.166667,0.333333,0.0
2,What forms of unethical behavior are strictly ...,"In the recruitment process of Inc., unethical ...","[unethical behavior, such as favoritism or nep...",Favoritism or nepotism,0.936252,0.166667,1.0,1.0
3,What is the significance of emotional and aest...,Emotional and aesthetic labor are significant ...,[LabourEmotional and aesthetic labor involves ...,Emotional and aesthetic labor in the workplace...,0.970923,0.352941,0.666667,1.0
4,What is the purpose of the orientation session...,The purpose of the orientation session at Tech...,[to help you get started. Company OverviewTech...,The purpose of the orientation session at Tech...,1.0,0.0,0.0,0.0
5,What mechanisms are in place for reporting vio...,"At Tech Innovators Inc., employees can report ...",[and identify areas for improvement.5.3 Report...,Employees can report violations of labor laws ...,0.942706,0.076923,0.25,1.0
6,How do employee engagement and disengagement d...,Employee engagement and disengagement differ s...,"[are motivated and committed, disengaged emplo...",Employee engagement and disengagement differ i...,0.960525,1.0,0.363636,1.0
7,What steps are needed to extract data from Con...,To extract data from Confluence and create a R...,[IntroductionThis guide provides a step-by-ste...,To extract data from Confluence and create a R...,0.989648,0.0,0.0,1.0
8,How does Tech Innovators Inc. promote employee...,Tech Innovators Inc. promotes employee engagem...,[concept that requires a holistic approach. At...,Tech Innovators Inc. promotes employee engagem...,0.980843,0.176471,0.8,1.0


In [56]:
file_name = "eval_result_test_dataset_it_cer.csv"
file_path = os.path.join(from_root(),"data-test/eval-result/", file_name)
df.to_csv(file_path, index=False)