## Effect of Chunk Sizes on RAG

<div class="alert alert-block alert-warning">
The code assumes that the API key for OpenAI is already in the env variables. </div>

In [None]:
!pip install llama-index
!pip install llama-index-llms-openai
!pip install pypdf
!pip install spacy

In [None]:
import nest_asyncio

nest_asyncio.apply()

from llama_index.core import (
    SimpleDirectoryReader,
    VectorStoreIndex,
    ServiceContext,
)
from llama_index.core.evaluation import (
    DatasetGenerator,
    FaithfulnessEvaluator,
    RelevancyEvaluator
)
from llama_index.llms.openai import OpenAI
from llama_index.core import Settings

import time

In [None]:
reader = SimpleDirectoryReader("../datasets/10k/")
documents = reader.load_data()

In [None]:
eval_documents = documents[:20]
data_generator = DatasetGenerator.from_documents(eval_documents)
eval_questions = data_generator.generate_questions_from_nodes(num = 5)

eval_questions

In [None]:
# We will use GPT-4 for evaluating the responses
Settings.llm = OpenAI(temperature=0, model="gpt-4")

# Define Faithfulness and Relevancy Evaluators which are based on GPT-4
faithfulness_gpt4 = FaithfulnessEvaluator()
relevancy_gpt4 = RelevancyEvaluator()

In [None]:
def evaluate_response_time_and_accuracy(eval_questions):

    total_response_time = 0
    total_faithfulness = 0
    total_relevancy = 0

    vector_index = VectorStoreIndex.from_documents(
        eval_documents,
    )

    query_engine = vector_index.as_query_engine()
    num_questions = len(eval_questions)

    for question in eval_questions:
        start_time = time.time()
        response_vector = query_engine.query(question)
        elapsed_time = time.time() - start_time

        faithfulness_result = faithfulness_gpt4.evaluate_response(
            response=response_vector
        ).passing

        relevancy_result = relevancy_gpt4.evaluate_response(
            query=question, response=response_vector
        ).passing

        total_response_time += elapsed_time
        total_faithfulness += faithfulness_result
        total_relevancy += relevancy_result

    average_response_time = total_response_time / num_questions
    average_faithfulness = total_faithfulness / num_questions
    average_relevancy = total_relevancy / num_questions

    return average_response_time, average_faithfulness, average_relevancy

In [None]:
for chunk_size in [128, 256, 512, 1024, 2048]:  # different chunk sizes
    Settings.chunk_size = chunk_size
    Settings.chunk_overlap = chunk_size*0.25
    avg_response_time, avg_faithfulness, avg_relevancy = evaluate_response_time_and_accuracy(eval_questions)
    print(f"Chunk size {chunk_size} - Average Response time: {avg_response_time:.2f}s, \
            Average Faithfulness: {avg_faithfulness:.2f}, Average Relevancy: {avg_relevancy:.2f}")