In [1]:
import os

# os.environ["OPENAI_API_KEY"] = "INSERT OPENAI KEY"
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [24]:
# https://github.com/run-llama/llama_index/blob/main/examples/paul_graham_essay/TestEssay.ipynb

In [2]:

from llama_index import TreeIndex, SimpleDirectoryReader
from IPython.display import Markdown, display

In [3]:
documents = SimpleDirectoryReader("data").load_data()


In [4]:
new_index = TreeIndex.from_documents(documents)


INFO:llama_index.indices.common_tree.base:> Building index from nodes: 1 chunks
> Building index from nodes: 1 chunks


In [5]:
# set Logging to DEBUG for more detailed outputs
query_engine = new_index.as_query_engine()
response = query_engine.query("What did the author do growing up?")

INFO:llama_index.indices.tree.select_leaf_retriever:>[Level 0] Selected node: [1]/[1]
>[Level 0] Selected node: [1]/[1]
INFO:llama_index.indices.tree.select_leaf_retriever:>[Level 1] Selected node: [1]/[1]
>[Level 1] Selected node: [1]/[1]


In [6]:
display(Markdown(f"<b>{response}</b>"))


<b>The author wrote short stories and also worked on programming, specifically on an IBM 1401 computer in 9th grade.</b>

In [7]:
# set Logging to DEBUG for more detailed outputs
response = query_engine.query("What did the author do after his time at Y Combinator?")

INFO:llama_index.indices.tree.select_leaf_retriever:>[Level 0] Selected node: [2]/[2]
>[Level 0] Selected node: [2]/[2]
INFO:llama_index.indices.tree.select_leaf_retriever:>[Level 1] Selected node: [5]/[5]
>[Level 1] Selected node: [5]/[5]


In [10]:
display(Markdown(f"<b>{response}</b>"))


<b>After the author's time at Y Combinator, they decided to pursue painting. They wanted to see how good they could get if they focused on it. They spent most of the rest of 2014 painting, but eventually ran out of steam and stopped working on it. They then started writing essays again and also began working on Lisp.</b>

# Build Tree Index with a custom Summary Prompt, directly retrieve answer from root node

In [11]:
from llama_index.prompts import PromptTemplate


In [13]:
documents = SimpleDirectoryReader("data").load_data()

query_str = "What did the author do growing up?"
SUMMARY_PROMPT_TMPL = (
    "Context information is below. \n"
    "---------------------\n"
    "{context_str}"
    "\n---------------------\n"
    "Given the context information and not prior knowledge, "
    f"answer the question: {query_str}\n"
)
SUMMARY_PROMPT = PromptTemplate(SUMMARY_PROMPT_TMPL)
index_with_query = TreeIndex.from_documents(documents, summary_template=SUMMARY_PROMPT)

INFO:llama_index.indices.common_tree.base:> Building index from nodes: 1 chunks
> Building index from nodes: 1 chunks


In [16]:
# directly retrieve response from root nodes instead of traversing tree
query_engine = index_with_query.as_query_engine(retriever_mode="root")
response = query_engine.query(query_str)

INFO:llama_index.indices.tree.tree_root_retriever:> Starting query: What did the author do growing up?
> Starting query: What did the author do growing up?


In [17]:
display(Markdown(f"<b>{response}</b>"))


<b>The author engaged in activities such as writing short stories and programming, including working on an IBM 1401 computer in 9th grade and teaching themselves Lisp. They also worked on reverse-engineering a program called SHRDLU for their undergraduate thesis.</b>

# Using GPT Keyword Table Index

In [18]:
from llama_index import KeywordTableIndex, SimpleDirectoryReader
from IPython.display import Markdown, display

In [19]:
# build keyword index
documents = SimpleDirectoryReader("data").load_data()
index = KeywordTableIndex.from_documents(documents)

In [20]:
# set Logging to DEBUG for more detailed outputs
query_engine = index.as_query_engine()
response = query_engine.query("What did the author do after his time at Y Combinator?")

INFO:llama_index.indices.keyword_table.retrievers:> Starting query: What did the author do after his time at Y Combinator?
> Starting query: What did the author do after his time at Y Combinator?
INFO:llama_index.indices.keyword_table.retrievers:query keywords: ['time', 'y combinator', 'author', 'combinator']
query keywords: ['time', 'y combinator', 'author', 'combinator']
INFO:llama_index.indices.keyword_table.retrievers:> Extracted keywords: ['time', 'y combinator', 'combinator']
> Extracted keywords: ['time', 'y combinator', 'combinator']


In [21]:
display(Markdown(f"<b>{response}</b>"))


<b>The author's next step after his time at Y Combinator was to pursue his own investment firm. He personally financed the firm and brought on board his partner, Jessica, to join the team.</b>

# Using GPT List Index

In [22]:
from llama_index import SummaryIndex, SimpleDirectoryReader
from IPython.display import Markdown, display

In [None]:

# build summary index
documents = SimpleDirectoryReader("data").load_data()
index = SummaryIndex.from_documents(documents)

In [None]:
# set Logging to DEBUG for more detailed outputs
query_engine = index.as_query_engine()
response = query_engine.query("What did the author do after his time at Y Combinator?")

In [23]:
display(Markdown(f"<b>{response}</b>"))


<b>The author's next step after his time at Y Combinator was to pursue his own investment firm. He personally financed the firm and brought on board his partner, Jessica, to join the team.</b>