In [29]:
from datasets import Dataset
import os
from ragas import evaluate
from ragas.metrics import faithfulness,context_precision,answer_relevancy,context_recall,context_utilization, answer_correctness
from langchain_community.chat_models import ChatAnyscale
from langchain_openai import ChatOpenAI
from langchain.chains import RetrievalQA
from utils.prompt_template_utils import get_prompt_template
import numpy as np
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
ANYSCALE_API_KEY = ""
os.environ["ANYSCALE_API_BASE"] = "https://api.endpoints.anyscale.com/v1"
os.environ["ANYSCALE_API_KEY"] = ANYSCALE_API_KEY 
OPENAI_API_KEY = ""
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

Create LLama2 chatbot for evaluation, GPT model for ground_truth generation

In [30]:
# final_embeddings = np.load(f"bge_large_embeddings.npy", allow_pickle=True)
# vectorstore = final_embeddings.item()
vectorstore = FAISS.load_local("", OpenAIEmbeddings(model="text-embedding-3-small"), allow_dangerous_deserialization=True)
template = """You are a software engineer answering to a senior software engineer who is testing your understanding of the code provided, you will use the provided knowledge to answer questions about the code. Think step by step and respond appropriately. If you can not answer a question based on 
the provided context, inform the user. Do not use any other prior information for answering. Give fully explained answers using the terms used in the code itself. A bulleted format is preferred but not necessary. Do not narrate the conversation, and do not generate your own questions.
"""


ANYSCALE_MODEL_NAME = "meta-llama/Meta-Llama-3-8B-Instruct"
# ANYSCALE_MODEL_NAME = "codellama/CodeLlama-70b-Instruct-hf"
LLM = ChatAnyscale(model_name = ANYSCALE_MODEL_NAME)
LLM2 = ChatOpenAI(model_name="gpt-3.5-turbo-0125")
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k":5})

prompt, memory = get_prompt_template(system_prompt=template, promptTemplate_type="llama", history=False)

qa_llama = RetrievalQA.from_chain_type(
    llm=LLM,
    chain_type="stuff",  # try other chains types as well. refine, map_reduce, map_rerank
    retriever=retriever,
    return_source_documents=True, # verbose=True,
    chain_type_kwargs={"prompt": prompt, "memory": memory},
    )

qa_gpt = RetrievalQA.from_chain_type(
    llm=LLM2,
    chain_type="stuff",  # try other chains types as well. refine, map_reduce, map_rerank
    retriever=retriever,
    return_source_documents=True, # verbose=True,
    chain_type_kwargs={"prompt": prompt, "memory": memory},
    )

Creating the dataset for RAGAS implementation

In [31]:
questions = []
contexts = []
file = open("aksha_questionaire.txt", 'r')
# file = open("model\sample_questions.txt", 'r')
for line in file:
    questions.append(line.strip())
file.close()

for ques in questions:
    contexts.append([docs.page_content for docs in retriever.get_relevant_documents(ques)])

To regenerate ground truths using GPT 3.5

In [15]:
ground_truths = []
for ques in questions:
    ground_truths.append(qa_gpt.invoke(ques)['result'])

To regenerate answers from LLAMA2

In [20]:
answers = []
for ques in questions:
    answers.append(qa_llama.invoke(ques)['result'])

In [21]:
import pandas as pd
df_aksha = pd.DataFrame({'questions':questions, 'answers':answers,'ground_truths':ground_truths,'contexts': contexts})

df_aksha.to_csv("aksha_answers.csv")

Compiling dataset into dictionary for RAGAS

In [35]:
import pandas as pd 
df = pd.read_csv("aksha_answers.csv", index_col=0)
questions,answers,new_contexts,ground_truths = [],[],[],[]
for index,row in df.iterrows():
    if row['correct_answer'] == "yes":
        questions.append(row['questions'])
        answers.append(row['answers'])
        new_contexts.append(contexts[index])
        ground_truths.append(row['ground_truths'])
        
print(questions)
print(len(questions))
print(answers)
print(len(answers))
print(ground_truths)
print(len(ground_truths))
print(len(new_contexts))



['Which database is used in the network?', 'Where is shared volume of this repository stored?', 'Login credentials API is from which cloud base?', 'Which services are responsible to generate alerts?', 'What is the folder structure of folder camera, which is created after cameras are activated', 'Explain code flow in Aksha Pipeline?', 'How many api are there from AWS?', 'How many apis are there in Node?', 'Explain Mongodb schema', 'What is the purpose of the file deletion.py?', 'what does the code in prefilter.py do?', 'Explain the code in surveillance.py', 'Explain the working of the function data_preprocess_object_based_anomaly() in the file data_preprocessor.py']
13
['Based on the provided code, the database used in the network is MongoDB. The connection to the MongoDB database is established using the following line of code:\n\nconn = pymongo.MongoClient("mongodb://mongo:mongo@mongodb:27017/Aksha?authSource=admin&tls=false", directConnection=True)\n\nThis line of code creates a conn

In [36]:
data = {"question": questions,
        "answer": answers,
        "contexts": new_contexts,
        "ground_truth": ground_truths}

dataset = Dataset.from_dict(data)

ragas_results = evaluate(
    dataset = dataset,
    metrics=[
        context_utilization,
        faithfulness,
        answer_relevancy,
        context_precision,
        context_recall,
        answer_correctness
    ]
)

df = ragas_results.to_pandas()

Evaluating: 100%|██████████| 78/78 [00:32<00:00,  2.42it/s]


In [37]:
print(df)
df.to_csv("ragas_evauation_for_aksha_llama3_text_embeddings_small_1024_chunk_length.csv")
# print(contexts[0])

                                             question  \
0              Which database is used in the network?   
1   Where is shared volume of this repository stored?   
2     Login credentials API is from which cloud base?   
3   Which services are responsible to generate ale...   
4   What is the folder structure of folder camera,...   
5                Explain code flow in Aksha Pipeline?   
6                    How many api are there from AWS?   
7                    How many apis are there in Node?   
8                              Explain Mongodb schema   
9        What is the purpose of the file deletion.py?   
10             what does the code in prefilter.py do?   
11                Explain the code in surveillance.py   
12  Explain the working of the function data_prepr...   

                                               answer  \
0   Based on the provided code, the database used ...   
1   The `site_data_dir` function returns the full ...   
2            The login credent