In [21]:
from dotenv import load_dotenv
import os
load_dotenv()
api_key = os.environ["OPENAI_API_KEY"]

In [22]:
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core import Settings

Settings.llm = OpenAI(model="gpt-4o-mini",temperature=0.8)
Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-small")

In [23]:
from llama_index.core import SimpleDirectoryReader

# load documents
documents = SimpleDirectoryReader("data").load_data()


In [24]:
from llama_index.core import Settings
# initialize settings(set chunk size)
Settings.chunk_size = 2000
Settings.chunk_overlap = 100
nodes = Settings.node_parser.get_nodes_from_documents(documents)

In [25]:
from llama_index.core import StorageContext

# initialize storage context (by default it's in-memory)
storage_context = StorageContext.from_defaults()
storage_context.docstore.add_documents(nodes)

In [26]:
from llama_index.core import SummaryIndex
from llama_index.core import VectorStoreIndex

summary_index = SummaryIndex(nodes, storage_context=storage_context)
vector_index = VectorStoreIndex(nodes, storage_context=storage_context)

In [27]:
list_query_engine = summary_index.as_query_engine(
    response_mode="tree_summarize",
    use_async=True,
)
vector_query_engine = vector_index.as_query_engine()

In [28]:
from llama_index.core.tools import QueryEngineTool


list_tool = QueryEngineTool.from_defaults(
    query_engine=list_query_engine,
    description=(
        "Useful for summarization questions related to Paul Graham eassy on"
        " What I Worked On."
    ),
)

vector_tool = QueryEngineTool.from_defaults(
    query_engine=vector_query_engine,
    description=(
        "Useful for retrieving specific context from Paul Graham essay on What"
        " I Worked On."
    ),
)

In [48]:
from llama_index.core.query_engine import RouterQueryEngine
from llama_index.core.selectors import LLMSingleSelector
from llama_index.core.selectors import (
    PydanticMultiSelector,
    PydanticSingleSelector,
)


query_engine = RouterQueryEngine(
    selector=PydanticSingleSelector.from_defaults(),
    query_engine_tools=[
        list_tool,
        vector_tool,
    ],
)

In [30]:
response = query_engine.query("What is the summary of the document?")
print(str(response))

The document provides comprehensive information on advanced Python programming concepts, including Object-Oriented Programming (OOP), features of OOP such as encapsulation, abstraction, polymorphism, and inheritance, along with practical examples. It covers key topics like exception handling, file handling, regular expressions, Python Database Connectivity (PDBC), and various advanced features like decorators, generator functions, and logging.

Additionally, it discusses multi-threading, synchronization mechanisms, and working with data using libraries such as Pandas and Matplotlib. The document also includes an overview of form 10-K for annual reports required by the SEC, detailing instructions for filing, disclosure on business risks, properties, legal proceedings, executive compensation, and more. Overall, it serves as a thorough guide for Python programming and regulatory reporting.


In [31]:
response = query_engine.query("What is class?")
print(str(response))

A class is a blueprint for creating objects in programming. It defines the properties and behaviors that the objects created from the class will have. In the provided examples, classes such as `Student`, `Car`, and `Outer` serve as templates from which instances (or objects) can be created. The syntax for creating a class is typically structured as `Class Classname`, and objects of the class can be instantiated using the syntax `Objectreferencevariable = Classname()`.


In [32]:
query_engine = RouterQueryEngine(
    selector=LLMSingleSelector.from_defaults(),
    query_engine_tools=[
        list_tool,
        vector_tool,
    ],
)

In [49]:
response = query_engine.query("what is the Business?")
print(str(response))

The Business section requires a brief description of the business conducted by the registrant and its subsidiaries during the most recent fiscal year. This description should indicate the general nature and scope of the business according to the management's opinion.


In [38]:
# [optional] look at selected results
print(str(response.metadata["selector_result"]))

selections=[SingleSelection(index=0, reason="The question 'what is this Business?' relates to understanding the content and themes discussed in Paul Graham's essay, making summarization relevant.")]


In [39]:
from llama_index.core import SimpleKeywordTableIndex

keyword_index = SimpleKeywordTableIndex(nodes, storage_context=storage_context)
keyword_tool = QueryEngineTool.from_defaults(
    # [optional] look at selected results
print(str(response.metadata["selector_result"]))
)

selections=[SingleSelection(index=0, reason="The question 'what is this Business?' relates to understanding the content and themes discussed in Paul Graham's essay, making summarization relevant.")]


In [40]:
query_engine = RouterQueryEngine(
    selector=PydanticMultiSelector.from_defaults(),
    query_engine_tools=[
        list_tool,
        vector_tool,
        keyword_tool
    ],
)

In [45]:
# [optional] look at selected results
print(str(response.metadata["selector_result"]))

selections=[SingleSelection(index=0, reason="The question 'what is this Business?' relates to understanding the content and themes discussed in Paul Graham's essay, making summarization relevant.")]


In [43]:
print(dir(query_engine))


['__abstractmethods__', '__annotations__', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__slots__', '__str__', '__subclasshook__', '__weakref__', '_abc_impl', '_aquery', '_as_query_component', '_get_prompt_modules', '_get_prompts', '_llm', '_metadatas', '_query', '_query_engines', '_selector', '_summarizer', '_update_prompts', '_validate_prompts', '_verbose', 'aquery', 'as_query_component', 'asynthesize', 'callback_manager', 'from_defaults', 'get_prompts', 'query', 'retrieve', 'synthesize', 'update_prompts']
