In [1]:
import logging
import sys
import os

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [2]:
import os
os.environ['NUMEXPR_MAX_THREADS'] = '4'
os.environ['NUMEXPR_NUM_THREADS'] = '2'
import numexpr as ne

In [3]:
import os
os.environ["OPENAI_API_KEY"] = "<Your API Key"
import openai
openai.api_key = "<Your API Key"

In [4]:
from llama_index import VectorStoreIndex, SimpleDirectoryReader
andrew_gina_docs = SimpleDirectoryReader(input_files=["./assets/AndrewHuberman/sleep/115_Dr_Gina_Poe_Use_Sleep_to_Enhance_Learning_Memory_&_Emotional_State_Huberman_Lab_Podcast.txt"], filename_as_id=True).load_data()

#creates a VectorStoreIndex using the documents loaded from the file.
vector_index = VectorStoreIndex.from_documents(andrew_gina_docs)

# converts the vector_index into a chat engine (vector_engine) to perform vector-based queries.
vector_engine = vector_index.as_chat_engine()

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


In [8]:
from llama_index import ListIndex, ServiceContext

service_context = ServiceContext.from_defaults(chunk_size=1024)
nodes = service_context.node_parser.get_nodes_from_documents(andrew_gina_docs)

# uses the ServiceContext to extract nodes from the documents and create a ListIndex from those nodes.
list_index = ListIndex(nodes)

In [9]:
#  converts the list_index into a query engine (list_query_engine) with specific response settings.

list_query_engine = list_index.as_query_engine(
    response_mode = "tree_summarize", use_async = True
)

In [10]:
from llama_index.tools.query_engine import QueryEngineTool


# creates a query engine tool (list_tool) for the list query engine with a specific description.
list_tool = QueryEngineTool.from_defaults( # For list query engine
    query_engine = list_query_engine,
    description="Useful for summarisation of the podcast about sleep and memory with dr. Gina Poe"
)

# creates a query engine tool (vector_tool) for the vector query engine with a specific description.
vector_tool = QueryEngineTool.from_defaults( # For vector query engine
    query_engine=vector_engine,
    description="Useful fro retrieving of specific content about sleep and memory in the podcast topic"
)

In [11]:
from llama_index.selectors.pydantic_selectors import PydanticSingleSelector
from llama_index.query_engine.router_query_engine import RouterQueryEngine

query_engine = RouterQueryEngine(
    selector=PydanticSingleSelector.from_defaults(),
    query_engine_tools=[
        list_tool,
        vector_tool
    ]
)

In [12]:
from llama_index.selectors.llm_selectors import LLMSingleSelector

query_engine = RouterQueryEngine(
    selector=LLMSingleSelector.from_defaults(),
    query_engine_tools=[
        list_tool,
        vector_tool
    ] 
)

In [13]:
openai.log = "none"

In [16]:
from llama_index.response.pprint_utils import pprint_response
response = query_engine.query("Give me a quick summary of the Andrew Huberman podcast with dr. Gina Poe.")
pprint_response(response, show_source=True)

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:llama_index.query_engine.router_query_engine:Selecting query engine 0: The first choice is relevant because it mentions summarization of the podcast about sleep and memory with dr. Gina Poe..
Selecting query engine 0: The first choice is relevant because it mentions summarization of the podcast about sleep and memory with dr. Gina Poe..
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/co

In [15]:
import nest_asyncio
nest_asyncio.apply()

Let's discuss each of these concepts:

1. **ListIndex:**
   - `ListIndex` is a data structure that represents a list of items. In the context of the provided code, it seems to be used to index and organize nodes obtained from a document. The nodes likely represent some structural or semantic elements within the document.

2. **tree_summarize:**
   - `tree_summarize` is a response mode for the list query engine. It suggests that the query engine is expected to provide a summarized response in a tree-like structure. The specifics of what this entails would depend on the implementation details of the query engine, but it generally implies a hierarchical and structured summary.

3. **RouterQueryEngine:**
   - `RouterQueryEngine` is a query engine that routes queries to different sub-engines based on certain criteria. In the provided code, two instances of `RouterQueryEngine` are created, each with a different selector (`PydanticSingleSelector` and `LLMSingleSelector`) and the same query engine tools (`list_tool` and `vector_tool`). This means that the router can direct queries to different query engines based on the type of selector used.

   - The `RouterQueryEngine` is configured to use a selector (which likely determines the type or format of the query) and a set of query engine tools (such as `list_tool` and `vector_tool) that define how to process and respond to queries.

In summary:
- `ListIndex` is a data structure for organizing and indexing lists of items.
- `tree_summarize` is a response mode that implies a structured, tree-like summary in the context of a list query engine.
- `RouterQueryEngine` is a query engine that directs queries to different sub-engines based on a selector. It's a way to handle different types of queries and route them to specialized processing engines.