In [2]:
%%capture
%pip install llama-index llama-index-embeddings-cohere qdrant-client llama-index-vector-stores-qdrant llama-index-llms-cohere

Note: you should install the following packages to your environment:

`pip install datasets`

`pip install llama-index-embeddings-fastembed`

`pip install llama-index-llms-mistralai`

In [3]:
import os
import sys
from dotenv import load_dotenv
from getpass import getpass
import nest_asyncio
from IPython.display import Markdown, display

nest_asyncio.apply()

load_dotenv()

sys.path.append('../helpers')



In [4]:
CO_API_KEY = os.environ['CO_API_KEY'] or getpass("Enter your Cohere API key: ")

In [None]:
OPENAI_API_KEY = os.environ['OPENAI_API_KEY'] or getpass("Enter your OpenAI API key: ")

In [5]:
QDRANT_URL = os.environ['QDRANT_URL'] or getpass("Enter your Qdrant URL:")

In [6]:
QDRANT_API_KEY = os.environ['QDRANT_API_KEY'] or  getpass("Enter your Qdrant API Key:")

In [7]:
from llama_index.core import StorageContext

from utils import setup_llm, setup_embed_model, setup_vector_store, create_index

#setup_llm(provider="cohere", model="command-r-plus", api_key=CO_API_KEY)

#setup_embed_model(provider="cohere", api_key=CO_API_KEY)

COLLECTION_NAME = "it_can_be_done"

vector_store = setup_vector_store(QDRANT_URL, QDRANT_API_KEY, COLLECTION_NAME)

storage_context = StorageContext.from_defaults(vector_store=vector_store)

index = create_index(from_where="vector_store", vector_store=vector_store, storage_context=storage_context)

[nltk_data] Downloading package punkt_tab to
[nltk_data]     /opt/conda/envs/llama/lib/python3.13/site-
[nltk_data]     packages/llama_index/core/_static/nltk_cache...
[nltk_data]   Package punkt_tab is already up-to-date!
Both client and aclient are provided. If using `:memory:` mode, the data between clients is not synced.


# Default Prompt Templates

In [42]:
from utils import display_prompt_dict, create_query_engine

In [91]:
query_engine = create_query_engine(
    index=index,
    similarity_top_k=3, 
    mode="query",
    return_sources=True
    )

display_prompt_dict(query_engine.get_prompts())

 **Prompt Key**: response_synthesizer:text_qa_template
**Text:**
```
Context information is below.
---------------------
{context_str}
---------------------
Given the context information and not prior knowledge, answer the query.
Query: {query_str}
Answer: 
```

**Prompt Key**: response_synthesizer:refine_template
**Text:**
```
The original query is as follows: {query_str}
We have provided an existing answer: {existing_answer}
We have the opportunity to refine the existing answer (only if needed) with some more context below.
------------
{context_msg}
------------
Given the new context, refine the original answer to better answer the query. If the context isn't useful, return the original answer.
Refined Answer: 
```



In [92]:
from llama_index.core.prompts import PromptTemplate

custom_prompt = """You are an assistant for question-answering tasks related to \
motivational poetry. You must reponsd with an original Haiku style poem.

Use the following pieces of retrieved context to answer the user's query:

---------------------\n
{context_str}\n
---------------------\n

Query: {query_str}
"""

custom_prompt_template = PromptTemplate(custom_prompt)

In [93]:
query_engine.update_prompts(
    {"response_synthesizer:text_qa_template": custom_prompt_template}
)

In [94]:
display_prompt_dict(query_engine.get_prompts())

 **Prompt Key**: response_synthesizer:text_qa_template
**Text:**
```
You are an assistant for question-answering tasks related to motivational poetry. You must reponsd with an original Haiku style poem.

Use the following pieces of retrieved context to answer the user's query:

---------------------

{context_str}

---------------------


Query: {query_str}

```

**Prompt Key**: response_synthesizer:refine_template
**Text:**
```
The original query is as follows: {query_str}
We have provided an existing answer: {existing_answer}
We have the opportunity to refine the existing answer (only if needed) with some more context below.
------------
{context_msg}
------------
Given the new context, refine the original answer to better answer the query. If the context isn't useful, return the original answer.
Refined Answer: 
```



In [101]:
from utils import create_query_pipeline

from llama_index.core.query_pipeline import InputComponent

input_component = InputComponent()

chain = [input_component, query_engine]

query_pipeline = create_query_pipeline(chain)

In [102]:
response = query_pipeline.run(input="If you keep your head when all around you are losing their cool and blaming it on you")

[1;3;38;2;155;135;227m> Running module 3c2c3f68-faeb-4727-ac74-54d45b142bbc with input: 
input: If you keep your head when all around you are losing their cool and blaming it on you

[0m

In [103]:
output, intermediates = query_pipeline.run_with_intermediates(input="If you keep your head when all around you are losing their cool and blaming it on you")

[1;3;38;2;155;135;227m> Running module 3c2c3f68-faeb-4727-ac74-54d45b142bbc with input: 
input: If you keep your head when all around you are losing their cool and blaming it on you

[0m

In [104]:
output.__dict__

{'response': "Keep your head held high,\nBlame and chaos surround you,\nStay calm, it's your shield.",
 'source_nodes': [NodeWithScore(node=TextNode(id_='9ce1f065-6287-41d8-9179-13390b67bbdb', embedding=None, metadata={'file_path': '../02_Fundamental_Concepts_in_LlamaIndex/data/pg10763.txt', 'file_name': 'pg10763.txt', 'file_type': 'text/plain', 'file_size': 405245, 'creation_date': '2025-02-02', 'last_modified_date': '2024-11-05'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='../02_Fundamental_Concepts_in_LlamaIndex/data/pg10763.txt', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'file_path': '../02_Fundamental_Concepts_in_LlamaIndex/data/pg10763.txt', 'file_name': 'pg10763.txt', 'file_type': 'text/pl

In [105]:
intermediates

{'3c2c3f68-faeb-4727-ac74-54d45b142bbc': ComponentIntermediates(inputs={'input': 'If you keep your head when all around you are losing their cool and blaming it on you'}, outputs={'output': Response(response="Keep your head held high,\nBlame and chaos surround you,\nStay calm, it's your shield.", source_nodes=[NodeWithScore(node=TextNode(id_='9ce1f065-6287-41d8-9179-13390b67bbdb', embedding=None, metadata={'file_path': '../02_Fundamental_Concepts_in_LlamaIndex/data/pg10763.txt', 'file_name': 'pg10763.txt', 'file_type': 'text/plain', 'file_size': 405245, 'creation_date': '2025-02-02', 'last_modified_date': '2024-11-05'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='../02_Fundamental_Concepts_in_LlamaIndex

In [106]:
print(response)

Keep your head, stay calm,
Blame is not yours to bear,
Cool heads will prevail.


# Response Synthesizers

The Llama Index [documentation](https://docs.llamaindex.ai/en/stable/module_guides/querying/response_synthesizers/) has a lot of detail regarding each of the response sythensizers. 

In [82]:
from llama_index.core.response_synthesizers import ResponseMode

In [83]:
dir(ResponseMode)

['ACCUMULATE',
 'COMPACT',
 'COMPACT_ACCUMULATE',
 'CONTEXT_ONLY',
 'GENERATION',
 'NO_TEXT',
 'REFINE',
 'SIMPLE_SUMMARIZE',
 'TREE_SUMMARIZE',
 '__add__',
 '__class__',
 '__contains__',
 '__delattr__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__getnewargs__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__len__',
 '__lt__',
 '__members__',
 '__mod__',
 '__module__',
 '__mul__',
 '__name__',
 '__ne__',
 '__new__',
 '__qualname__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__rmod__',
 '__rmul__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 'capitalize',
 'casefold',
 'center',
 'count',
 'encode',
 'endswith',
 'expandtabs',
 'find',
 'format',
 'format_map',
 'index',
 'isalnum',
 'isalpha',
 'isascii',
 'isdecimal',
 'isdigit',
 'isidentifier',
 'islower',
 'isnumeric',
 'isprintable',
 'isspace',
 'istitle',
 'isupper',
 'join',
 'ljust',
 'l

# Response modes

In LlamaIndex, [response modes](https://docs.llamaindex.ai/en/stable/module_guides/deploying/query_engine/response_modes/) are used to determine how the system should process and return the results of a query.  Each response mode is designed to handle different types of queries and use cases, providing flexibility and customization in how you interact with your data.



#### ⚗️ Refine 

Refine is an iterative method to generate a response. 

Initially, we use the context in the first node and the query to create a basic answer. Then, we refine this answer by inputting it, along with the query and context of the second node, into a "refine prompt" to generate an improved answer. 

This refinement process continues through N-1 nodes, with N being the total number of nodes. It makes a separate LLM call per Node/retrieved chunk. This mode is good for generating more detailed answers.

#### 🤏 Compact

Compact and refine mode first combine text chunks into larger consolidated chunks that more fully utilize the available context window, then refine answers across them. This mode is faster than refine since we make fewer calls to the LLM. 

This mode is useful when you want to reduce the number of LLM calls while still refining the answer.


#### 📝 Simple summarize

Merge all text chunks into one and make a large language model call. The call will fail if the merged text chunk exceeds the context window size.

It's good for quick summarization purposes, but may lose detail due to truncation.

#### 🌴 Tree summarize

Construct a tree index for the candidate nodes in a bottom-up manner then use a summary prompt based on the query. Return the root node as the final response. When this mode is set, the system is instructed to iterate through many, if not all, documents in order to synthesize an answer, which can lead to better summarization results. 

This mode is particularly useful for summarization queries, where the goal is to provide a comprehensive summary of a collection of text or a specific topic.

#### 🤖 Generation

Ignore context, just use LLM to generate a response.

#### ❌ No text

This mode only runs the retriever to fetch the nodes that would have been sent to the LLM, without actually synthesizing a final response. The nodes can then be inspected by checking `response.source_nodes`.

#### 📏 Accumulate

This mode applies the query to each text chunk while accumulating the responses into an array. It returns a concatenated string of all responses. 

This mode is good for when you need to run the same query separately against each text chunk.

#### Compact accumulate

In the compact and accumulate mode, text chunks are combined into larger chunks to utilize the context window better. Answers are then accumulated for each chunk and returned as a concatenation. This mode is faster than accumulate as it reduces calls to the LLM.

In [85]:
from llama_index.core import get_response_synthesizer

response_synthesizer = get_response_synthesizer(response_mode="compact")

query_engine = create_query_engine(
    index,
    mode="query",
    response_synthesizer = response_synthesizer
    )

input_component = InputComponent()

chain = [input_component, query_engine]

query_pipeline = create_query_pipeline(chain)

query_pipeline.run(input="What do the poems teach about one should think about success and failure?")

[1;3;38;2;155;135;227m> Running module 6aa44991-3e07-4bba-a81b-6f3a4b0b8ac8 with input: 
input: What do the poems teach about one should think about success and failure?

[0m[1;3;38;2;155;135;227m> Running module a9633f85-46fd-4b2f-9b25-135cc51a60eb with input: 
input: What do the poems teach about one should think about success and failure?

[0m

Response(response='The poems teach that it is not failure itself that defines a person, but how they respond to it. It is important to get back up and keep trying, even when faced with setbacks or challenges. Success is not the only thing that matters; the struggle and effort put forth are also valuable. It is important to learn from failures and to keep fighting for what you believe in.', source_nodes=[NodeWithScore(node=TextNode(id_='e33e4bd0-d5b9-482e-813e-8d11709cd8c1', embedding=None, metadata={'file_path': '../02_Fundamental_Concepts_in_LlamaIndex/data/pg10763.txt', 'file_name': 'pg10763.txt', 'file_type': 'text/plain', 'file_size': 405245, 'creation_date': '2025-02-02', 'last_modified_date': '2024-11-05'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<N