## Building the Testset

In [1]:
# load the documents
from llama_index.core import SimpleDirectoryReader

PARSED_PATH = '../data/Bill_FAQs/parsed'

documents = SimpleDirectoryReader(PARSED_PATH).load_data()

In [2]:
from ragas.testset.generator import TestsetGenerator
from ragas.testset.evolutions import simple, reasoning, multi_context
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding

# generator with openai models
generator_llm = OpenAI(model="gpt-4o-mini")
critic_llm = OpenAI(model="gpt-4o")
embeddings = OpenAIEmbedding()

generator = TestsetGenerator.from_llama_index(
    generator_llm=generator_llm,
    critic_llm=critic_llm,
    embeddings=embeddings,
)

In [3]:
# generate testset
testset = generator.generate_with_llamaindex_docs(
    documents,
    test_size=20,
    distributions={simple: 0.6, reasoning: 0.2, multi_context: 0.2},
)

embedding nodes:   0%|          | 0/106 [00:00<?, ?it/s]

Filename and doc_id are the same for all nodes.


Generating:   0%|          | 0/20 [00:00<?, ?it/s]

Retrying llama_index.llms.openai.base.OpenAI._achat in 0.618060470326046 seconds as it raised RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-4oFYp4ibpfv2UUvqxZBXzdKT on tokens per min (TPM): Limit 30000, Used 29323, Requested 1604. Please try again in 1.854s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}.
Retrying llama_index.llms.openai.base.OpenAI._achat in 0.3821798080942256 seconds as it raised RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-4oFYp4ibpfv2UUvqxZBXzdKT on tokens per min (TPM): Limit 30000, Used 29314, Requested 1603. Please try again in 1.834s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}.
Retrying llama_index.llms.openai.base.OpenAI._achat in 0.006499672667239054 seconds a

In [4]:
df = testset.to_pandas()
df.head(30)

Unnamed: 0,question,contexts,ground_truth,evolution_type,metadata,episode_done
0,What should you do if your refund check is dat...,"[My refund check is dated over 90 days, what d...","If your refund check is dated over 90 days, yo...",simple,[{'file_path': '/Users/annabeketova/Yandex.Dis...,True
1,What is the true-up process and how does it af...,[\n\nAnnual gas and electric rate change\n\n ...,The true-up process is an annual procedure aut...,simple,[{'file_path': '/Users/annabeketova/Yandex.Dis...,True
2,What factors contribute to increased energy us...,[Why are my winter bills higher than previous ...,Factors contributing to increased energy use d...,simple,[{'file_path': '/Users/annabeketova/Yandex.Dis...,True
3,What options are available for viewing bill in...,[How can I see bill inserts if I receive paper...,"If you receive paperless bills, you can view c...",simple,[{'file_path': '/Users/annabeketova/Yandex.Dis...,True
4,What charges are included in the annual gas an...,[\n\nAnnual gas and electric rate change\n\n ...,The charges included in the annual gas and ele...,simple,[{'file_path': '/Users/annabeketova/Yandex.Dis...,True
5,What can I do to quickly analyze my electric b...,[Why is my electric bill so high?\nYour electr...,"To quickly analyze your electric bill, you can...",simple,[{'file_path': '/Users/annabeketova/Yandex.Dis...,True
6,What could cause a customer to receive a bill ...,[Why did I receive another person's bill?\nThe...,A customer could receive a bill under the prev...,simple,[{'file_path': '/Users/annabeketova/Yandex.Dis...,True
7,What steps should individuals take to file a c...,[What steps do I need to take to file a claim ...,Individuals whose homes were destroyed by the ...,simple,[{'file_path': '/Users/annabeketova/Yandex.Dis...,True
8,What options are available for spreading payme...,[Will I be required to pay my bill all at once...,You can spread your payments over time by visi...,simple,[{'file_path': '/Users/annabeketova/Yandex.Dis...,True
9,What is the importance of maintaining a paymen...,[Do I need to pay my bill if it is estimated?\...,Maintaining a payment schedule for estimated b...,simple,[{'file_path': '/Users/annabeketova/Yandex.Dis...,True


In [5]:
df.to_csv("testset_20_questions.csv")

## Building the QueryEngine from every separate document

In [6]:
CHROMA_DB_PERSISTENT_PATH = '../data/chroma_db'
CHROMA_DB_COLLECTION_NAME = "bills_faqs"

In [7]:
import chromadb
from llama_index.core import VectorStoreIndex
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core import StorageContext

# initialize client
db = chromadb.PersistentClient(path=CHROMA_DB_PERSISTENT_PATH)

# get collection
chroma_collection = db.get_or_create_collection(CHROMA_DB_COLLECTION_NAME)

# assign chroma as the vector_store to the context
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

# load your index from stored vectors
index = VectorStoreIndex.from_vector_store(
    vector_store, storage_context=storage_context
)

# create a query engine
query_engine = index.as_query_engine()

In [8]:
df = testset.to_pandas()
df["question"][0]

'What should you do if your refund check is dated over 90 days and you need to request a replacement?'

In [9]:
response_vector = query_engine.query(df["question"][0])

print(response_vector)

Contact the Customer Service Department at the provided phone number based on whether you are a residential or business customer. They will assist you in requesting a replacement for your refund check.


## Evaluating the QueryEngine

In [10]:
from ragas.metrics import (
    faithfulness,
    answer_relevancy,
    context_precision,
    context_recall,
)
from ragas.metrics.critique import harmfulness

metrics = [
    faithfulness,
    answer_relevancy,
    context_precision,
    context_recall,
    harmfulness,
]

In [11]:
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding

evaluator_llm = OpenAI(model="gpt-4o-mini")

In [12]:
# convert to HF dataset
ds = testset.to_dataset()

ds_dict = ds.to_dict()
ds_dict["question"]
ds_dict["ground_truth"]

['If your refund check is dated over 90 days, you should contact the Customer Service Department, and one of the Customer Service Representatives will assist you in requesting a replacement.',
 'The true-up process is an annual procedure authorized by the California Public Utilities Commission that results in changes to customer gas and electric rates. For the year 2023, as part of this true-up, average residential non-CARE gas bills decreased by 4.6 percent, while average residential non-CARE electric bills increased by approximately 3.4 percent. The rate changes include charges for gas and electric delivery, electricity supply, and state-mandated assistance programs.',
 'Factors contributing to increased energy use during the winter months include colder temperatures leading to higher heating costs, increased natural gas prices, and a rise in energy consumption as customers use more natural gas for heating. Specifically, California has used about 26% more natural gas than the five-ye

In [13]:
from ragas.integrations.llama_index import evaluate

result = evaluate(
    query_engine=query_engine,
    metrics=metrics,
    dataset=ds_dict,
    llm=evaluator_llm,
    embeddings=OpenAIEmbedding(),
)

Running Query Engine:   0%|          | 0/20 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/100 [00:00<?, ?it/s]

n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaInd

In [14]:
# final scores
print(result)

{'faithfulness': 0.8464, 'answer_relevancy': 0.9402, 'context_precision': 0.9000, 'context_recall': 0.8417, 'harmfulness': 0.1500}


In [15]:
result.to_pandas().head(20)

Unnamed: 0,question,contexts,answer,ground_truth,faithfulness,answer_relevancy,context_precision,context_recall,harmfulness
0,What should you do if your refund check is dat...,[Refund checks are valid for 90 days from the ...,Contact the Customer Service Department and sp...,"If your refund check is dated over 90 days, yo...",1.0,0.924009,1.0,1.0,1
1,What is the true-up process and how does it af...,[Annual gas and electric rate change\n\n \n\n...,The true-up process is an annual procedure aut...,The true-up process is an annual procedure aut...,1.0,0.962443,1.0,1.0,0
2,What factors contribute to increased energy us...,[Cold weather can mean higher heating costs to...,Factors that contribute to increased energy us...,Factors contributing to increased energy use d...,1.0,1.0,1.0,1.0,0
3,What options are available for viewing bill in...,[If you receive paperless bills and would like...,You can access current and past bill inserts o...,"If you receive paperless bills, you can view c...",1.0,0.904882,1.0,1.0,1
4,What charges are included in the annual gas an...,[Annual gas and electric rate change\n\n \n\n...,Charges related to electricity supply included...,The charges included in the annual gas and ele...,0.5,0.973613,1.0,1.0,0
5,What can I do to quickly analyze my electric b...,[Your electric bill will fluctuate depending o...,Select Compare my bills for a quick analysis.,"To quickly analyze your electric bill, you can...",1.0,0.918534,1.0,1.0,1
6,What could cause a customer to receive a bill ...,[The most common reason customers receive anot...,"In some cases, customers may receive a bill un...",A customer could receive a bill under the prev...,1.0,0.95804,1.0,1.0,0
7,What steps should individuals take to file a c...,"[For homes destroyed by the Dixie fire, ‌PG&E’...",Individuals should prepare by checking their e...,Individuals whose homes were destroyed by the ...,1.0,0.986801,1.0,1.0,0
8,What options are available for spreading payme...,"[If you are unable to pay your estimated bill,...",Payment Arrangements allow you to spread your ...,You can spread your payments over time by visi...,1.0,0.866482,1.0,0.0,0
9,What is the importance of maintaining a paymen...,"[Yes, you still need to pay your bill if it is...",Maintaining a payment schedule for estimated b...,Maintaining a payment schedule for estimated b...,1.0,0.992843,1.0,1.0,0


In [16]:
result.to_pandas().to_csv("result_evaluation_20_questions_separate_sim_2.csv")

## Building the QueryEngine from 1 document with all questions

In [17]:
CHROMA_DB_PERSISTENT_PATH = '../data/chroma_db_1_source'
CHROMA_DB_COLLECTION_NAME = "bills_faqs"

In [18]:
import chromadb
from llama_index.core import VectorStoreIndex
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core import StorageContext

# initialize client
db = chromadb.PersistentClient(path=CHROMA_DB_PERSISTENT_PATH)

# get collection
chroma_collection = db.get_or_create_collection(CHROMA_DB_COLLECTION_NAME)

# assign chroma as the vector_store to the context
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

# load your index from stored vectors
index = VectorStoreIndex.from_vector_store(
    vector_store, storage_context=storage_context
)

# create a query engine
query_engine = index.as_query_engine()

In [19]:
df = testset.to_pandas()
df["question"][0]

'What should you do if your refund check is dated over 90 days and you need to request a replacement?'

In [20]:
response_vector = query_engine.query(df["question"][0])

print(response_vector)

Contact the Customer Service Department and speak with one of the Customer Service Representatives to request a replacement if your refund check is dated over 90 days.


In [21]:
from ragas.integrations.llama_index import evaluate

result = evaluate(
    query_engine=query_engine,
    metrics=metrics,
    dataset=ds_dict,
    llm=evaluator_llm,
    embeddings=OpenAIEmbedding(),
)

Running Query Engine:   0%|          | 0/20 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/100 [00:00<?, ?it/s]

n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaInd

In [24]:
# final scores
print(result)

{'faithfulness': 0.8464, 'answer_relevancy': 0.9489, 'context_precision': 0.9500, 'context_recall': 0.8000, 'harmfulness': 0.0500}


In [25]:
result.to_pandas().head(20)

Unnamed: 0,question,contexts,answer,ground_truth,faithfulness,answer_relevancy,context_precision,context_recall,harmfulness
0,What should you do if your refund check is dat...,[2\. Click Edit Profile & Alerts\n\n3\. Scroll...,Contact the Customer Service Department and sp...,"If your refund check is dated over 90 days, yo...",1.0,0.962015,1.0,1.0,1
1,What is the true-up process and how does it af...,[Annual gas and electric rate change\n\n \n\n...,The true-up process is an annual process autho...,The true-up process is an annual procedure aut...,1.0,0.913396,1.0,1.0,0
2,What factors contribute to increased energy us...,"[For more information, visit **LIHEAP. **You m...",Factors such as colder temperatures leading to...,Factors contributing to increased energy use d...,1.0,1.0,1.0,1.0,0
3,What options are available for viewing bill in...,[I have not received my bill yet. Where is it?...,You can access current and past bill inserts o...,"If you receive paperless bills, you can view c...",1.0,0.949805,1.0,1.0,0
4,What charges are included in the annual gas an...,[Annual gas and electric rate change\n\n \n\n...,Charges related to electricity supply included...,The charges included in the annual gas and ele...,1.0,0.93713,1.0,1.0,0
5,What can I do to quickly analyze my electric b...,[This can be done online by signing into Your ...,You can quickly analyze your electric bill by ...,"To quickly analyze your electric bill, you can...",1.0,0.923497,1.0,0.0,0
6,What could cause a customer to receive a bill ...,[Why did I get another bill after I stopped se...,"A crossed meter situation, where the billing s...",A customer could receive a bill under the prev...,0.666667,0.973137,1.0,0.0,0
7,What steps should individuals take to file a c...,[Steps to submit a claim and the required docu...,Prepare to make your claim by checking eligibi...,Individuals whose homes were destroyed by the ...,1.0,0.95736,1.0,1.0,0
8,What options are available for spreading payme...,[Offering [financial assistance\nprograms](htt...,You can spread your payments over time by visi...,You can spread your payments over time by visi...,1.0,0.948509,1.0,1.0,0
9,What is the importance of maintaining a paymen...,[Why did I get another bill after I stopped se...,Maintaining a payment schedule for estimated b...,Maintaining a payment schedule for estimated b...,0.833333,0.992843,1.0,1.0,0


In [26]:
result.to_pandas().to_csv("result_evaluation_20_questions_together_sim_2.csv")

## Building the QueryEngine from 1 document with all questions, similarity_top_k = 3

In [27]:
CHROMA_DB_PERSISTENT_PATH = '../data/chroma_db_1_source'
CHROMA_DB_COLLECTION_NAME = "bills_faqs"

In [28]:
import chromadb
from llama_index.core import VectorStoreIndex
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core import StorageContext

# initialize client
db = chromadb.PersistentClient(path=CHROMA_DB_PERSISTENT_PATH)

# get collection
chroma_collection = db.get_or_create_collection(CHROMA_DB_COLLECTION_NAME)

# assign chroma as the vector_store to the context
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

# load your index from stored vectors
index = VectorStoreIndex.from_vector_store(
    vector_store, storage_context=storage_context
)

# create a query engine
query_engine = index.as_query_engine(similarity_top_k=3)

In [29]:
df = testset.to_pandas()
df["question"][0]

'What should you do if your refund check is dated over 90 days and you need to request a replacement?'

In [30]:
response_vector = query_engine.query(df["question"][0])

print(response_vector)

Contact the Customer Service Department at the provided phone number to request a replacement if your refund check is dated over 90 days.


In [31]:
from ragas.integrations.llama_index import evaluate

result = evaluate(
    query_engine=query_engine,
    metrics=metrics,
    dataset=ds_dict,
    llm=evaluator_llm,
    embeddings=OpenAIEmbedding(),
)

Running Query Engine:   0%|          | 0/20 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/100 [00:00<?, ?it/s]

n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
Retrying llama_index.llms.openai.base.OpenAI._achat in 0.20571611278535162 seconds as it raised RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-4oFYp4ib

In [32]:
# final scores
print(result)

{'faithfulness': 0.9138, 'answer_relevancy': 0.9460, 'context_precision': 0.9167, 'context_recall': 0.8750, 'harmfulness': 0.1500}


In [33]:
result.to_pandas().head(20)

Unnamed: 0,question,contexts,answer,ground_truth,faithfulness,answer_relevancy,context_precision,context_recall,harmfulness
0,What should you do if your refund check is dat...,[2\. Click Edit Profile & Alerts\n\n3\. Scroll...,Contact the Customer Service Department at the...,"If your refund check is dated over 90 days, yo...",1.0,0.949998,1.0,1.0,0
1,What is the true-up process and how does it af...,[Annual gas and electric rate change\n\n \n\n...,The true-up process is an annual process autho...,The true-up process is an annual procedure aut...,1.0,0.913408,0.833333,1.0,0
2,What factors contribute to increased energy us...,"[For more information, visit **LIHEAP. **You m...",Higher natural gas prices and lower temperatur...,Factors contributing to increased energy use d...,1.0,0.932392,1.0,1.0,0
3,What options are available for viewing bill in...,[I have not received my bill yet. Where is it?...,You can access current and past bill inserts o...,"If you receive paperless bills, you can view c...",1.0,0.949805,1.0,1.0,0
4,What charges are included in the annual gas an...,[Annual gas and electric rate change\n\n \n\n...,Charges related to electricity supply included...,The charges included in the annual gas and ele...,1.0,0.940093,1.0,1.0,0
5,What can I do to quickly analyze my electric b...,[This can be done online by signing into Your ...,You can quickly analyze your electric bill by ...,"To quickly analyze your electric bill, you can...",1.0,0.923497,0.833333,0.0,1
6,What could cause a customer to receive a bill ...,[Why did I get another bill after I stopped se...,"A crossed meter issue, where the billing syste...",A customer could receive a bill under the prev...,0.5,0.973094,0.833333,0.0,0
7,What steps should individuals take to file a c...,[Steps to submit a claim and the required docu...,Prepare to make a claim by checking eligibilit...,Individuals whose homes were destroyed by the ...,1.0,0.975664,1.0,1.0,0
8,What options are available for spreading payme...,[Offering [financial assistance\nprograms](htt...,You can spread your payments over time for you...,You can spread your payments over time by visi...,1.0,0.969226,1.0,1.0,1
9,What is the importance of maintaining a paymen...,[Why did I get another bill after I stopped se...,Maintaining a payment schedule for estimated b...,Maintaining a payment schedule for estimated b...,1.0,0.992843,1.0,1.0,0


In [34]:
result.to_pandas().to_csv("result_evaluation_20_questions_together_sim_3.csv")

## Building the QueryEngine from 1 document with all questions, similarity_top_k = 4

In [35]:
CHROMA_DB_PERSISTENT_PATH = '../data/chroma_db_1_source'
CHROMA_DB_COLLECTION_NAME = "bills_faqs"

In [36]:
import chromadb
from llama_index.core import VectorStoreIndex
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core import StorageContext

# initialize client
db = chromadb.PersistentClient(path=CHROMA_DB_PERSISTENT_PATH)

# get collection
chroma_collection = db.get_or_create_collection(CHROMA_DB_COLLECTION_NAME)

# assign chroma as the vector_store to the context
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

# load your index from stored vectors
index = VectorStoreIndex.from_vector_store(
    vector_store, storage_context=storage_context
)

# create a query engine
query_engine = index.as_query_engine(similarity_top_k=4)

In [37]:
df = testset.to_pandas()
df["question"][0]

'What should you do if your refund check is dated over 90 days and you need to request a replacement?'

In [38]:
response_vector = query_engine.query(df["question"][0])

print(response_vector)

Contact the Customer Service Department at the provided phone number based on whether you are a residential or business customer. They will assist you in requesting a replacement for your refund check if it is dated over 90 days.


In [39]:
from ragas.integrations.llama_index import evaluate

result = evaluate(
    query_engine=query_engine,
    metrics=metrics,
    dataset=ds_dict,
    llm=evaluator_llm,
    embeddings=OpenAIEmbedding(),
)

Running Query Engine:   0%|          | 0/20 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/100 [00:00<?, ?it/s]

n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
No statements were generated from the answer.
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
Retrying llama_index.llms.openai.base.OpenAI._achat in 0.26927797578213086 seconds as it raised RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-4oFYp4ibpfv2UUvqxZ

In [40]:
# final scores
print(result)

{'faithfulness': 0.8543, 'answer_relevancy': 0.9560, 'context_precision': 0.9056, 'context_recall': 0.8750, 'harmfulness': 0.1000}


In [41]:
result.to_pandas().head(20)

Unnamed: 0,question,contexts,answer,ground_truth,faithfulness,answer_relevancy,context_precision,context_recall,harmfulness
0,What should you do if your refund check is dat...,[2\. Click Edit Profile & Alerts\n\n3\. Scroll...,Contact the Customer Service Department at the...,"If your refund check is dated over 90 days, yo...",1.0,0.962015,0.916667,1.0,0
1,What is the true-up process and how does it af...,[Annual gas and electric rate change\n\n \n\n...,The true-up process is an annual process autho...,The true-up process is an annual procedure aut...,0.909091,1.0,0.805556,1.0,0
2,What factors contribute to increased energy us...,"[For more information, visit **LIHEAP. **You m...",Factors that contribute to increased energy us...,Factors contributing to increased energy use d...,1.0,1.0,1.0,1.0,0
3,What options are available for viewing bill in...,[I have not received my bill yet. Where is it?...,You can access current and past bill inserts o...,"If you receive paperless bills, you can view c...",1.0,0.949793,1.0,1.0,1
4,What charges are included in the annual gas an...,[Annual gas and electric rate change\n\n \n\n...,Charges related to electricity supply included...,The charges included in the annual gas and ele...,1.0,0.973613,1.0,1.0,0
5,What can I do to quickly analyze my electric b...,[This can be done online by signing into Your ...,You can quickly analyze your electric bill by ...,"To quickly analyze your electric bill, you can...",1.0,0.952492,0.833333,0.0,1
6,What could cause a customer to receive a bill ...,[Why did I get another bill after I stopped se...,"A crossed meter issue, where the billing syste...",A customer could receive a bill under the prev...,0.5,0.973137,0.833333,0.0,0
7,What steps should individuals take to file a c...,[Steps to submit a claim and the required docu...,Prepare to make your claim by checking eligibi...,Individuals whose homes were destroyed by the ...,1.0,0.975591,1.0,1.0,0
8,What options are available for spreading payme...,[Offering [financial assistance\nprograms](htt...,You can spread your payments over time for you...,You can spread your payments over time by visi...,1.0,0.969226,1.0,1.0,0
9,What is the importance of maintaining a paymen...,[Why did I get another bill after I stopped se...,Maintaining a payment schedule for estimated b...,Maintaining a payment schedule for estimated b...,0.666667,0.992843,1.0,1.0,0


In [42]:
result.to_pandas().to_csv("result_evaluation_20_questions_together_sim_4.csv")

## Building the QueryEngine from 1 document with all questions, similarity_top_k = 2, Evaluator gpt-3.5-turbo

In [43]:
CHROMA_DB_PERSISTENT_PATH = '../data/chroma_db_1_source'
CHROMA_DB_COLLECTION_NAME = "bills_faqs"

In [45]:
import chromadb
from llama_index.core import VectorStoreIndex
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core import StorageContext

# initialize client
db = chromadb.PersistentClient(path=CHROMA_DB_PERSISTENT_PATH)

# get collection
chroma_collection = db.get_or_create_collection(CHROMA_DB_COLLECTION_NAME)

# assign chroma as the vector_store to the context
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

# load your index from stored vectors
index = VectorStoreIndex.from_vector_store(
    vector_store, storage_context=storage_context
)

# create a query engine
query_engine = index.as_query_engine(similarity_top_k=2)

In [46]:
evaluator_llm = OpenAI(model="gpt-3.5-turbo-0125")

In [47]:
from ragas.integrations.llama_index import evaluate

result = evaluate(
    query_engine=query_engine,
    metrics=metrics,
    dataset=ds_dict,
    llm=evaluator_llm,
    embeddings=OpenAIEmbedding(),
)

Running Query Engine:   0%|          | 0/20 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/100 [00:00<?, ?it/s]

n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
No statements were generated from the answer.
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
No statements were generated from the answer.
n values gre

In [48]:
# final scores
print(result)

{'faithfulness': 0.5324, 'answer_relevancy': 0.9141, 'context_precision': 0.9000, 'context_recall': 0.7750, 'harmfulness': 0.0000}


In [49]:
result.to_pandas().head(20)

Unnamed: 0,question,contexts,answer,ground_truth,faithfulness,answer_relevancy,context_precision,context_recall,harmfulness
0,What should you do if your refund check is dat...,[2\. Click Edit Profile & Alerts\n\n3\. Scroll...,Contact the Customer Service Department and sp...,"If your refund check is dated over 90 days, yo...",1.0,0.870933,1.0,1.0,0
1,What is the true-up process and how does it af...,[Annual gas and electric rate change\n\n \n\n...,The true-up process is an annual process autho...,The true-up process is an annual procedure aut...,1.0,0.921562,1.0,1.0,0
2,What factors contribute to increased energy us...,"[For more information, visit **LIHEAP. **You m...",Higher natural gas prices and lower temperatur...,Factors contributing to increased energy use d...,1.0,0.822423,1.0,1.0,0
3,What options are available for viewing bill in...,[I have not received my bill yet. Where is it?...,You can access current and past bill inserts o...,"If you receive paperless bills, you can view c...",0.5,0.969963,1.0,1.0,0
4,What charges are included in the annual gas an...,[Annual gas and electric rate change\n\n \n\n...,Charges related to electricity supply included...,The charges included in the annual gas and ele...,0.5,0.980041,1.0,1.0,0
5,What can I do to quickly analyze my electric b...,[This can be done online by signing into Your ...,You can quickly analyze your electric bill by ...,"To quickly analyze your electric bill, you can...",0.333333,0.994388,1.0,1.0,0
6,What could cause a customer to receive a bill ...,[Why did I get another bill after I stopped se...,"A crossed meter situation, where the billing s...",A customer could receive a bill under the prev...,0.0,0.954244,1.0,0.0,0
7,What steps should individuals take to file a c...,[Steps to submit a claim and the required docu...,Prepare to make a claim by checking eligibilit...,Individuals whose homes were destroyed by the ...,1.0,0.837761,1.0,1.0,0
8,What options are available for spreading payme...,[Offering [financial assistance\nprograms](htt...,You can spread your payments over time by visi...,You can spread your payments over time by visi...,0.5,0.942308,1.0,1.0,0
9,What is the importance of maintaining a paymen...,[Why did I get another bill after I stopped se...,Maintaining a payment schedule for estimated b...,Maintaining a payment schedule for estimated b...,0.2,0.988442,1.0,1.0,0


In [50]:
result.to_pandas().to_csv("result_evaluation_20_questions_together_sim_2_gpt35.csv")

## Building the QueryEngine from 1 document with all questions, similarity_top_k = 2, Evaluator gpt-4o

In [51]:
CHROMA_DB_PERSISTENT_PATH = '../data/chroma_db_1_source'
CHROMA_DB_COLLECTION_NAME = "bills_faqs"

In [52]:
import chromadb
from llama_index.core import VectorStoreIndex
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core import StorageContext

# initialize client
db = chromadb.PersistentClient(path=CHROMA_DB_PERSISTENT_PATH)

# get collection
chroma_collection = db.get_or_create_collection(CHROMA_DB_COLLECTION_NAME)

# assign chroma as the vector_store to the context
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

# load your index from stored vectors
index = VectorStoreIndex.from_vector_store(
    vector_store, storage_context=storage_context
)

# create a query engine
query_engine = index.as_query_engine(similarity_top_k=2)

In [53]:
evaluator_llm = OpenAI(model="gpt-4o")

In [54]:
from ragas.integrations.llama_index import evaluate

result = evaluate(
    query_engine=query_engine,
    metrics=metrics,
    dataset=ds_dict,
    llm=evaluator_llm,
    embeddings=OpenAIEmbedding(),
)

Running Query Engine:   0%|          | 0/20 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/100 [00:00<?, ?it/s]

n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
n values greater than 1 not support for LlamaIndex LLMs
Retrying llama_index.llms.openai.base.OpenAI._achat in 0.342379808703694 seconds as it raised RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-4oFYp4ibpfv2UUvqxZBXzdKT on tokens per min (TPM): Limit 30000, Used 29644, Requested 1732. Please try again in 2.752s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}.
Retrying llama_index.llms.openai.base.OpenAI._achat in 0.5080566454599431 seconds as it raised RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-4oFYp4ibpfv2UUvqxZBXzdKT on tokens per min (TPM): Limit 30000, Used 29491, Requested 1986. Please try again in 2.954s. Visit https://platform.openai.com/account/rate-limits 

In [55]:
# final scores
print(result)

{'faithfulness': 0.7089, 'answer_relevancy': 0.9294, 'context_precision': 0.7500, 'context_recall': 0.8571, 'harmfulness': 0.0526}


In [56]:
result.to_pandas().head(20)

Unnamed: 0,question,contexts,answer,ground_truth,faithfulness,answer_relevancy,context_precision,context_recall,harmfulness
0,What should you do if your refund check is dat...,[2\. Click Edit Profile & Alerts\n\n3\. Scroll...,Contact the Customer Service Department and sp...,"If your refund check is dated over 90 days, yo...",1.0,0.962015,1.0,1.0,0.0
1,What is the true-up process and how does it af...,[Annual gas and electric rate change\n\n \n\n...,The true-up process is an annual process autho...,The true-up process is an annual procedure aut...,1.0,0.913408,1.0,1.0,0.0
2,What factors contribute to increased energy us...,"[For more information, visit **LIHEAP. **You m...",Higher natural gas prices and increased energy...,Factors contributing to increased energy use d...,,0.822423,1.0,,
3,What options are available for viewing bill in...,[I have not received my bill yet. Where is it?...,You can access current and past bill inserts o...,"If you receive paperless bills, you can view c...",1.0,0.949805,0.5,1.0,0.0
4,What charges are included in the annual gas an...,[Annual gas and electric rate change\n\n \n\n...,Charges related to electricity supply included...,The charges included in the annual gas and ele...,1.0,0.988603,1.0,1.0,0.0
5,What can I do to quickly analyze my electric b...,[This can be done online by signing into Your ...,You can quickly analyze your electric bill by ...,"To quickly analyze your electric bill, you can...",1.0,,1.0,,0.0
6,What could cause a customer to receive a bill ...,[Why did I get another bill after I stopped se...,"A crossed meter situation, where the billing s...",A customer could receive a bill under the prev...,,0.809304,0.0,0.0,0.0
7,What steps should individuals take to file a c...,[Steps to submit a claim and the required docu...,Prepare to make a claim by checking eligibilit...,Individuals whose homes were destroyed by the ...,,,1.0,1.0,0.0
8,What options are available for spreading payme...,[Offering [financial assistance\nprograms](htt...,You can spread your payments over time by visi...,You can spread your payments over time by visi...,0.5,0.948229,0.5,1.0,0.0
9,What is the importance of maintaining a paymen...,[Why did I get another bill after I stopped se...,Maintaining a payment schedule for estimated b...,Maintaining a payment schedule for estimated b...,,,1.0,1.0,0.0


In [57]:
result.to_pandas().to_csv("result_evaluation_20_questions_together_sim_2_gpt4o.csv")