In [25]:
from llama_index.indices.vector_store.base import VectorStoreIndex
from llama_index.readers.file.base import SimpleDirectoryReader

# Needed for running async functions in Jupyter Notebook
import nest_asyncio

nest_asyncio.apply()

## Download Data

In [26]:
!mkdir -p 'data/paul_graham/'
!wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/examples/data/paul_graham/paul_graham_essay.txt' -O 'data/paul_graham/paul_graham_essay.txt'


--2024-01-28 01:52:35--  https://raw.githubusercontent.com/run-llama/llama_index/main/docs/examples/data/paul_graham/paul_graham_essay.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 75042 (73K) [text/plain]
Saving to: ‘data/paul_graham/paul_graham_essay.txt’


2024-01-28 01:52:35 (71.9 MB/s) - ‘data/paul_graham/paul_graham_essay.txt’ saved [75042/75042]



In [27]:
documents = SimpleDirectoryReader("./data/paul_graham/").load_data()
index = VectorStoreIndex.from_documents(documents)
query = "What did the author do growing up?"

In [29]:
base_query_engine = index.as_query_engine()
response = base_query_engine.query(query)
rprint(response.response)


R## etry Query Engine
The retry query engine uses an evaluator to improve the response from a base query engine.

It does the following:

1. first queries the base query engine, then

2. use the evaluator to decided if the response passes.

3. If the response passes, then return response,

4. Otherwise, transform the original query with the evaluation result (query, response, and feedback) into a new query,

5. Repeat up to max_retries

In [32]:
from llama_index.query_engine import RetryQueryEngine
from llama_index.evaluation import RelevancyEvaluator

query_response_evaluator = RelevancyEvaluator()
retry_query_engine = RetryQueryEngine(
    base_query_engine, query_response_evaluator
)
retry_response = retry_query_engine.query(query)
rprint(retry_response.response)


## Retry Source Query Engine
The Source Retry modifies the query source nodes by filtering the existing source nodes for the query based on llm node evaluation.



In [40]:
from llama_index.query_engine import RetrySourceQueryEngine

retry_source_query_engine = RetrySourceQueryEngine(
    base_query_engine, query_response_evaluator
)
retry_source_response = retry_source_query_engine.query(query)
rprint(retry_source_response.response)

## Retry Guideline Query Engine
This module tries to use guidelines to direct the evaluator’s behavior. You can customize your own guidelines.

In [36]:
from llama_index.evaluation.guideline import (
    GuidelineEvaluator,
    DEFAULT_GUIDELINES,
)
from llama_index.response.schema import Response
from llama_index.indices.query.query_transform.feedback_transform import (
    FeedbackQueryTransformation,
)
from llama_index.query_engine.retry_query_engine import (
    RetryGuidelineQueryEngine,
)

# Guideline eval
guideline_eval = GuidelineEvaluator(
    guidelines=DEFAULT_GUIDELINES
    + "\nThe response should not be overly long.\n"
    "The response should try to summarize where possible.\n"
)  # just for example

Let’s look like what happens under the hood.

In [43]:
typed_response = (
    response if isinstance(response, Response) else response.get_response()
)
eval = guideline_eval.evaluate_response(query, typed_response)
rprint(f"Guideline eval evaluation result: {eval.feedback}")

feedback_query_transform = FeedbackQueryTransformation(resynthesize_query=True)
transformed_query = feedback_query_transform.run(query, {"evaluation": eval})
rprint(f"Transformed query: {transformed_query.query_str}")

In [44]:
retry_guideline_query_engine = RetryGuidelineQueryEngine(
    base_query_engine, guideline_eval, resynthesize_query=True
)
retry_guideline_response = retry_guideline_query_engine.query(query)
rprint(retry_guideline_response.response)