# Ragas evaluation
Test batch and ragas capability.

Uses this article as a model: https://towardsdatascience.com/visualize-your-rag-data-evaluate-your-retrieval-augmented-generation-system-with-ragas-fc2486308557

Ragas repository: https://github.com/explodinggradients/ragas/tree/main

In [2]:
import os, sys
from ragas.testset import TestsetGenerator
from ragas import RunConfig
from dotenv import load_dotenv,find_dotenv
import chromadb
from chromadb import PersistentClient
from langchain_community.vectorstores import Chroma
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_core.documents import Document
import pandas as pd
import random

# Import local packages
sys.path.append('../src/aerospace_chatbot')
from data_processing import _stable_hash_meta

# Set environment variables with .env
load_dotenv(find_dotenv(), override=True)

True

## Connect to database

In [3]:
persistent_client = chromadb.PersistentClient(path=os.path.join(os.getenv('LOCAL_DB_PATH'),'chromadb'))   
query_model=OpenAIEmbeddings(model='text-embedding-ada-002',openai_api_key=os.getenv('OPENAI_API_KEY'))

# Connect to vectorstore where no chunking was done only full PDF pages
vectorstore = Chroma(client=persistent_client,
                        collection_name='chromadb-openai-ams-400chunk',
                        embedding_function=query_model)  

In [4]:
all_docs = vectorstore.get(include=["metadatas", "documents", "embeddings"])

In [5]:
lcdocs = [Document(page_content=doc, metadata=metadata) 
          for doc, metadata in zip(all_docs['documents'], all_docs['metadatas'])]

## Generate synthetic dataset

In [5]:
generator_model="gpt-3.5-turbo-16k"
generator_llm = ChatOpenAI(model=generator_model)

critic_model="gpt-3.5-turbo-16k"
# critic_model="gpt-4"
critic_llm = ChatOpenAI(model=critic_model)

embeddings = OpenAIEmbeddings()

In [6]:
generator = TestsetGenerator.from_langchain(
    generator_llm,
    critic_llm,
    embeddings
)
# Calculate the number of elements to sample. Use a random cross section.
size=0.005
sample_size = int(len(lcdocs) * size)

# Get a random sample of lcdocs
lcdocs_random = random.sample(lcdocs, sample_size)

In [7]:
run_config=RunConfig(timeout=120,
                    max_retries=20,
                    max_wait=120,
                    max_workers=8)

n_questions=5
testset = generator.generate_with_langchain_docs(lcdocs_random, 
                                                 test_size=n_questions,
                                                 with_debugging_logs=True,
                                                 is_async=False,
                                                 run_config=run_config)
df_testset=testset.to_pandas()
df_testset.to_csv('testset.csv', index=False)

embedding nodes:   0%|          | 0/62 [00:00<?, ?it/s]

Generating:   0%|          | 0/5 [00:00<?, ?it/s]

[ragas.testset.filters.DEBUG] node filter: {'score': 4.0}
[ragas.testset.evolutions.INFO] retrying evolution: 0 times
[ragas.testset.filters.DEBUG] node filter: {'score': 5.0}
[ragas.testset.evolutions.INFO] retrying evolution: 0 times
[ragas.testset.filters.DEBUG] node filter: {'score': 4.0}
[ragas.testset.evolutions.INFO] retrying evolution: 0 times
[ragas.testset.filters.DEBUG] node filter: {'score': 4.0}
[ragas.testset.evolutions.INFO] retrying evolution: 0 times
[ragas.testset.filters.DEBUG] node filter: {'score': 7.5}
[ragas.testset.evolutions.DEBUG] keyphrases in merged node: ['Mass intensity', 'Moisture inside the test chamber', 'Range of the mass spectrometer', 'Amplification settings', 'Conducted PoD experiments']
[ragas.testset.filters.DEBUG] node filter: {'score': 4.0}
[ragas.testset.evolutions.INFO] retrying evolution: 0 times
[ragas.testset.filters.DEBUG] node filter: {'score': 4.5}
[ragas.testset.evolutions.INFO] retrying evolution: 0 times
[ragas.testset.filters.DEBUG] 

In [10]:
df_testset

Unnamed: 0,question,contexts,ground_truth,evolution_type,metadata,episode_done
0,What is the impact of pull-out strengths for i...,[Table 2 summarizes the calculation for the to...,The pull-out strengths for inserts in Al-1100 ...,simple,"[{'page': 228, 'source': 'AMS_2018.pdf', 'file...",True
1,What is the advantage of using a complex magne...,[the smallest temperature coefficient is prefe...,The advantage of using a complex magnet config...,simple,"[{'page': 192, 'source': 'AMS_2008.pdf', 'file...",True
2,What were the main sources of shock output tes...,[thepercentcontribution thateachofthethreemain...,The main sources of shock output tested during...,reasoning,"[{'page': 149, 'source': 'AMS_2002.pdf', 'file...",True
3,What is the main component for installing all ...,[490 Mechanism Background \n\nThis deployment ...,The main component for installing all the elem...,multi_context,"[{'page': 504, 'source': 'AMS_2012.pdf', 'file...",True
4,Why are nanocomposite sputter-deposited MoS2 c...,[221 Degradation of Sputter -Deposited Nanocom...,Nanocomposite sputter-deposited MoS2 coatings ...,reasoning,"[{'page': 235, 'source': 'AMS_2016.pdf', 'file...",True


### Format dataset and database for RAG

In [11]:
questions_all = [
    {
        "question": qa.question,
        "ground_truth": qa.ground_truth,
        "question_by": generator_model,
    }
    for qa in testset.test_data
]

5

In [12]:
df_questions = pd.DataFrame(
    {
        "id": [f"Question {i}" for i, _ in enumerate(questions_all)],
        "question": [qa["question"] for qa in questions_all],
        "ground_truth": [qa["ground_truth"] for qa in questions_all],
        "question_by": [qa["question_by"] for qa in questions_all],
    }
)
# keep only the first question if questions are duplicated
df_questions = df_questions.drop_duplicates(subset=["question"])
df_questions

Unnamed: 0,id,question,ground_truth,question_by
0,Question 0,What is the impact of pull-out strengths for i...,The pull-out strengths for inserts in Al-1100 ...,gpt-3.5-turbo-16k
1,Question 1,What is the advantage of using a complex magne...,The advantage of using a complex magnet config...,gpt-3.5-turbo-16k
2,Question 2,What were the main sources of shock output tes...,The main sources of shock output tested during...,gpt-3.5-turbo-16k
3,Question 3,What is the main component for installing all ...,The main component for installing all the elem...,gpt-3.5-turbo-16k
4,Question 4,Why are nanocomposite sputter-deposited MoS2 c...,Nanocomposite sputter-deposited MoS2 coatings ...,gpt-3.5-turbo-16k


In [None]:
all_docs = vectorstore.get(include=["metadatas", "documents", "embeddings"])
df_docs = pd.DataFrame(
    {
        "id": [_stable_hash_meta(metadata) for metadata in all_docs["metadatas"]],
        "source": [metadata.get("source") for metadata in all_docs["metadatas"]],
        "page": [metadata.get("page", -1) for metadata in all_docs["metadatas"]],
        "document": all_docs["documents"],
        "embedding": all_docs["embeddings"],
    }
)

## RAG questions/answers