In [1]:
import os
from dotenv import load_dotenv
import openai

load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")

In [2]:
from llama_index import VectorStoreIndex, get_response_synthesizer
from llama_index.schema import Document


In [3]:
from llama_index import load_index_from_storage
from llama_index import StorageContext, ServiceContext


storage_context = StorageContext.from_defaults(persist_dir="research_paper_index")
index = load_index_from_storage(storage_context, index_id="graph_of_thoughts")

[nltk_data] Downloading package punkt to /tmp/llama_index...
[nltk_data]   Unzipping tokenizers/punkt.zip.


## Data Agents

#### basic function calling

In [4]:
from llama_index.tools import FunctionTool
from llama_index.llms import OpenAI
from llama_index.agent import OpenAIAgent, ReActAgent

In [3]:
#define a simple tool
def multiply(a:int, b:int) -> int:
    """multiply 2 numbers and return the integer result"""
    return a*b

#it learns from the doc string.
multiply_tool = FunctionTool.from_defaults(fn=multiply)

In [5]:
#initialize llm
llm = OpenAI(model="gpt-3.5-turbo-0613")

- In openai agent, tool selection is baked into it through fine tuning process - (selects tools given description)
- In react agent, tool selection is computed with ReAct process. (open source llms, gpt4 - but requires more llm calls to select tools)

In [7]:
#create data agent = openai or react

react_agent = ReActAgent.from_tools(tools=[multiply_tool],llm=llm, verbose=True)
openai_agent = OpenAIAgent.from_tools(tools=[multiply_tool], llm=llm, verbose=True)

#### query engines as tools

[use this in conjunction with context retrieval and query planning and routing if needed. Or just plainly if the use case is simple.]

In [8]:
from llama_index.tools import QueryEngineTool, ToolMetadata

In [12]:
#imagine if we have many different indices from which we can query
uber_query_engine = index.as_query_engine(similarity_top_k=3)
lyft_query_engine = index.as_query_engine(similarity_top_k=3)

query_engine_tools = [
    QueryEngineTool(
        query_engine=uber_query_engine,
        metadata=ToolMetadata(
            name="uber_10k",
            description="Provides information about Uber financials for year 2021. Use a detailed plain text query as input to the tool."
        )
    ),
    QueryEngineTool(
        query_engine=lyft_query_engine,
        metadata=ToolMetadata(
            name="lyft_10k",
            description="Provides information about Lyft financials for year 2021. Use a detailed plain text query as input to the tool."
        )

    )
]

# react_agent = ReActAgent.from_tools(
#     tools=[multiply_tool, *query_engine_tools], 
#     llm=llm, 
#     verbose=True)

#### Use other Agents as tools

In [None]:
# query_engine_tools = [
#     QueryEngineTool(
#         query_engine=sql_agent,
#         metadata=ToolMetadata(
#             name="sql_agent", description="Agent that can execute SQL queries."
#         ),
#     ),
#     QueryEngineTool(
#         query_engine=gmail_agent,
#         metadata=ToolMetadata(
#             name="gmail_agent",
#             description="Tool that can send emails on Gmail.",
#         ),
#     ),
# ]

# outer_agent = ReActAgent.from_tools(query_engine_tools, llm=llm, verbose=True)

#### Retrieving tools from an index; 

[use this for different function/sql/any tool and not for retrieval]

In [14]:
from llama_index.objects import ObjectIndex, SimpleToolNodeMapping

tool_mapping = SimpleToolNodeMapping.from_objects(query_engine_tools)
object_index = ObjectIndex.from_objects(
    query_engine_tools,
    tool_mapping,
    VectorStoreIndex
)
#this is deprecated
# from llama_index.agent import FnRetrieverOpenAIAgent
# openai_agent = FnRetrieverOpenAIAgent.from_retriever(
#     object_index.as_retriever(),
#     verbose=True
# )
#so use the following instead of FnRetrieverOpenAIAgent.from_re
openai_agent = OpenAIAgent.from_tools(tool_retriever=object_index.as_retriever(), verbose=True)

#### Context Retrieval Agent

Perform retrieval before calling tools --> pick better tools

[can be used with action function tools or with query engine tools]

In [32]:
from llama_index.agent import ContextRetrieverOpenAIAgent

# store a list of abbreviations
texts = [
    "Abbreviation: X = Revenue",
    "Abbreviation: YZ = Risk Factors",
    "Abbreviation: Z = Costs",
]
abbreviation_docs = [Document(text=t) for t in texts]
context_index = VectorStoreIndex.from_documents(abbreviation_docs)

# add context agent
context_agent = ContextRetrieverOpenAIAgent.from_tools_and_retriever(
    [march2023, feb2023, jan2023, dec2022],
    context_index.as_retriever(similarity_top_k=1),
    verbose=True
)
## response = context_agent.chat("what is YZ of march 2023 ?") -> It will know where to look for and also understand what YZ means

#### Query Planning

- infers a pydantic schema for complicated query plan
- can be used in conjunction with context retrieval agent for more complicated situations.

In [35]:
from llama_index.tools import QueryPlanTool

response_synthesizer = get_response_synthesizer()
query_plan_tool = QueryPlanTool.from_defaults(
    query_engine_tools=query_engine_tools,
    response_synthesizer=response_synthesizer
)

openai_agent = OpenAIAgent.from_tools(
    tools=[query_plan_tool],
    max_function_calls=10,
    llm=OpenAI(temperature=0, model="gpt-4-0613")

)

In [9]:
from pydantic import BaseModel

class User(BaseModel):
    """
    When you create an instance of User with user_data, Pydantic validates the data according to the schema, 
    ensuring that name is a string, age is an integer, etc. 
    If the data doesn't match the schema, it raises an error.
    """
    name: str
    age: int
    is_active: bool = True

# Example usage
user_data = {'name': 'Alice', 'age': 30}
user = User(**user_data)
print(user)

name='Alice' age=30 is_active=True


Do all topics in Model Guide

- utility tools - augment the capabilities of existing tools
- check OnDemandToolLoader, and LoadAndSearchToolSpec

#### Tool Specs

In [38]:
# from llama_index import BaseToolSpec
# # class MyToolsSpec()

#### External tools from llamahub

In [47]:
from llama_hub.youtube_transcript import YoutubeTranscriptReader
loader = YoutubeTranscriptReader()
transcript_docs = loader.load_data(ytlinks=["https://www.youtube.com/watch?v=FNXIeEIu6LM"])

In [50]:
sadhguru_index = VectorStoreIndex.from_documents(transcript_docs)

In [57]:
sadhguru_query_engine = sadhguru_index.as_query_engine(response_mode="tree_summarize")

In [58]:
response = sadhguru_query_engine.query("what is sadhguru trying to tell ?")

In [63]:
from pprint import pprint
pprint(response.response)

('Sadhguru is trying to convey the idea that instead of trying to release or '
 'suppress emotions such as anger, lust, or hatred, one should learn to '
 'transform and redirect that energy in the right direction. He emphasizes the '
 'importance of not killing the energy but rather finding the right pathway '
 "for it. Sadhguru also mentions that increasing one's energy levels through "
 'practices and methods can help in this process. Additionally, he suggests '
 "that having a guru in one's life can provide guidance on where the energy "
 'should go. Ultimately, the goal is to accumulate enough energy to burst into '
 'a different dimension or a higher state of being.')


## Retriever

In [66]:
retriever = sadhguru_index.as_retriever()
nodes = retriever.retrieve("How to avoid anger ?")

In [69]:
for n in nodes:
    print(n.text)

[Music]
lust is great energy keep it inside cook
[Music]
yourself don't try to kill that energy
redirect it in the right
direction you have to burst into the
next
[Music]
damage
this is a big difference that you see in
the understanding of Life between east
and west east west has always talked
about
release everything you must release
it your psychological situations you
must find release either in somebody or
something so the Easter way of life is
always don't
release whether it's your anger or your
hatred or your lust whatever keep it
inside cook
yourself if you don't allow it any
release it'll sh
up this is
your don't allow you to release anyway
plug all the
holes if you plug all the holes it will
only go up so this is the way
of this is where these philosophies are
totally diametrical to other ways what
they're saying is
whatever you may do whether you get
angry or you lustful or hatred the
tremendous energy isn't it anger is
great energy isn't
it hatred is great energy lust is gre

For each type of index, there are specific retriever modes with specific purpose.

## Response Synthesizers

With structured_answer_filtering set to True, our refine module is able to filter out any input nodes that are not relevant to the question being asked. This is particularly useful for RAG-based Q&A systems that involve retrieving chunks of text from external vector store for a given user query.

In [75]:
from llama_index.schema import Node
from llama_index.response_synthesizers import ResponseMode, get_response_synthesizer

response_synthesizer = get_response_synthesizer(
    response_mode=ResponseMode.COMPACT,
    structured_answer_filtering=True #response_mode needs to be compact or refine
)
# same as response_synthesizer = get_response_synthesizer(response_mode="compact", structured_answer_filtering=True)

# response = response_synthesizer.synthesize(
#     "query text",
#     nodes = nodes
# )

In [72]:
response.response

"I'm sorry, but I cannot answer the query as there is no specific query text provided in the context information."

In [73]:
query_engine = index.as_query_engine(response_synthesizer=response_synthesizer)
# query_engine.query(ask someting)

**Response Modes:**

    - refine
    - compact
    - tree_summarize
    - simple_summarize
    - no_text
    - accumulate
    - compact_accumulate

#### custom response synthesizers by inheriting from llama_index.response_synthesizers.base.BaseSynthesizer

#### Custom Prompt Templates (with additional variables)

In [76]:
from llama_index import PromptTemplate
from llama_index.response_synthesizers import TreeSummarize

qa_prompt_tmpl = (
    "Context information is below.\n"
    "-----------------------------\n"
    "{context_str}\n"
    "-----------------------------\n"
    "Given the context information and not prior knowledge,"
    "answer the query.\n"
    "Please also write the answer in the tone of {tone_name}.\n"
    "Query: {query_str}\n"
    "Answer: "
)

qa_prompt = PromptTemplate(qa_prompt_tmpl)

summarizer = TreeSummarize(verbose=True, summary_template=qa_prompt)

# response = summarizer.get_response("query", [text], tone_name="a shakespeare play")

In [None]:
response_synthesizer = get_response_synthesizer(
    response_mode="",
    service_context=service_context,
    text_qa_template=text_qa_tmpl,
    refine_qa_template=refine_qa_tmpl,
    use_async=False,
    streaming=False
)

# response_synthesizer.synthesize() -> for synchronous
# await response_synthesizer.asynthesize() -> for async

#### Pydantic Tree Summarize

In [77]:
from typing import List
from llama_index.types import BaseModel


class Biography(BaseModel):
    """Data model for a biography."""
    name: str
    best_known_for: List[str]
    extra_info: str

summarizer = TreeSummarize(
    verbose=True,
    summary_template=qa_prompt,
    output_cls=Biography
)

In [None]:
# response = summarizer.get_response(
#     "who is Paul Graham?", [text], tone_name="a business memo"
# )

## Router

It is basically something that chooses an option from multiple choices. The choosen option is the route it will take.

It can be on top of a query engine or a retriever

In [22]:
from llama_index.query_engine.router_query_engine import RouterQueryEngine
from llama_index.retrievers.router_retriever import RouterRetriever
from llama_index.selectors.pydantic_selectors import PydanticSingleSelector
from llama_index.tools import QueryEngineTool, RetrieverTool

In [None]:
#do not run

list_query_engine = "some query engine"
vector_query_engine = "some other query engine"

list_tool = QueryEngineTool.from_defaults(
    query_engine=list_query_engine,
    description="Useful for summarizing questions related to data source."
)

vector_tool = QueryEngineTool.from_defaults(
    query_engine=vector_query_engine,
    description="Useful for retrieving specific context related to data source."
)

query_engine = RouterQueryEngine(
        selector=PydanticSingleSelector.from_defaults(),
        query_engine_tools=[
            list_tool,
            vector_tool
        ]
)

# query_engine.query("something")

#### on top of a retriever

In [None]:
#do not run

vector_retriever = vector_tool.as_retriever()
list_retriever = list_tool.as_retriever()

vector_tool = RetrieverTool.from_defaults(
    retriever=vector_retriever,
    description="Useful for retrieving specific context from Quantum Physics"
)

list_tool = RetrieverTool.from_defaults(
    retriever=list_retriever,
    description="Useful for retrieving specific context from CBC News"
)

#routing
retriever = RouterRetriever(
    selector=PydanticSingleSelector.from_defaults(),
    retriever_tools=[vector_tool, list_tool]
)

#### Selectors can be used separately

In [24]:
from llama_index.selectors.llm_selectors import (
    LLMSingleSelector, #choose single from many choices
    LLMMultiSelector, #choose multiple from many choices
)

from llama_index.selectors.pydantic_selectors import (
    PydanticMultiSelector, #choose multiple from many choices
    PydanticSingleSelector, #choose single from many choices
)

#individual llms can be finetuned in single tool and multi tool selection for a given task.

In [25]:
choices = [
    ToolMetadata(description="description of tool1", name="tool1"),
    ToolMetadata(description="description of tool2", name="tool2")
]
# [or]
choices = [
    "tool 1 - description of tool1",
    "tool 2 - description of tool2"
]

selector = LLMSingleSelector.from_defaults()
# result = selector.select(
#     choices,
#     query="select the good tool"
# )

## Node PostProcessors

- apply postprocessing after retrieving
- happens inside query engine before response synthesis
- accepts NodeWithScore objects only

In [26]:
from llama_index.postprocessor import (
    SimilarityPostprocessor,
    CohereRerank
)
from llama_index.schema import Node, NodeWithScore

In [27]:
nodes = [
    NodeWithScore(node=Node(text="something"), score=0.7),
    NodeWithScore(node=Node(text="some other thing"), score=0.65),
    NodeWithScore(node=Node(text="something other"), score=0.45),
    NodeWithScore(node=Node(text="something else"), score=0.3)
]

#similarity node processor: only nodes that have a similarity score of 0.6 or more

sim_processor = SimilarityPostprocessor(similarity_cutoff=0.6)
filtered = sim_processor.postprocess_nodes(nodes)

#rerank nodes given query string

# reranker = CohereRerank(api_key="", top_n=2)
# filtered = reranker.postprocess_nodes(nodes)
query_engine = index.as_query_engine(
    node_postprocessors = [sim_processor] # + [reranker]
)


Here are summaries of some node postprocessor modules from LlamaIndex and examples of practical situations where they can be used:

- SimilarityPostprocessor: This module removes nodes that are below a certain similarity score threshold. For instance, if you are gathering information from various sources and want to ensure that only the most relevant and closely related content is included, you can use this postprocessor to filter out less similar nodes​​.

- KeywordNodePostprocessor: It ensures specific keywords are either included or excluded in the nodes. This is useful in situations where you want to focus on certain topics or avoid irrelevant ones. For example, if you are analyzing text for specific themes or concepts, you can set this postprocessor to include nodes containing specific keywords and exclude others​​.

- MetadataReplacementPostProcessor: This postprocessor replaces the node content with a field from the node's metadata. It's most useful in combination with the SentenceWindowNodeParser. A practical use case could be when you're working with documents where certain metadata fields are more relevant than the main content, allowing you to swap the node content with this key information​​.

- LongContextReorder: This module re-orders retrieved nodes, which is beneficial when dealing with large amounts of data and where crucial data might be buried in the middle of long contexts. It helps in scenarios where you need to sift through extensive data and want the most relevant information to be more accessible.

- others are rerankers and simple to understand


We can also have custom node post processors

## Output Parsing Modules

In [43]:
from pydantic import BaseModel
from typing import List
# from guidance.llms import OpenAI
from llama_index.program import GuidancePydanticProgram, OpenAIPydanticProgram

In [39]:
#Define Output Schema

class Song(BaseModel):
    title: str
    length_sec: int

class Album(BaseModel):
    name: str
    artist: str
    songs: List[Song]

#### OpenAI Pydantic program

In [52]:
prompt_tmpl_str = (
    "Generate an example album, with artist and a list of songs. Using the movie {movie_name} as inspiration"
)

program = OpenAIPydanticProgram.from_defaults(
    output_cls=Album,
    prompt_template_str=prompt_tmpl_str,
    verbose=True
)

#for parallel function calling:
program_parallel = OpenAIPydanticProgram.from_defaults(
    output_cls=Album,
    prompt_template_str=prompt_tmpl_str,
    llm=OpenAI(model="gpt-3.5-turbo-1106"),
    allow_multiple=True,
    verbose=True
)

- can also do streaming..

#### Guidance pydantic program

In [42]:
prompt_tmpl_str = (
    "Generate an example album, with artist and a list of songs. Using the movie {{movie_name}} as inspiration"
)

program = GuidancePydanticProgram(
    output_cls=Album,
    prompt_template_str=prompt_tmpl_str,
    guidance_llm=OpenAI("text-davinci-003"),
    verbose=True
)

# output = program(movie_name="Outlander")

------