In [58]:
import os

# os.environ["OPENAI_API_KEY"] = "INSERT OPENAI KEY"
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [59]:
# https://github.com/run-llama/llama_index/blob/main/examples/paul_graham_essay/TestEssay.ipynb
# print(os.environ["OPENAI_API_KEY"])

In [60]:

from llama_index import TreeIndex, SimpleDirectoryReader
from IPython.display import Markdown, display

In [61]:
# 计数

import tiktoken
from llama_index.llms import Anthropic
from llama_index.callbacks import CallbackManager, TokenCountingHandler

from llama_index import (
    SimpleDirectoryReader,
    VectorStoreIndex,
    ServiceContext,
    set_global_service_context,
)

from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext

# 设置tokenizer和TokenCountingHandler
tokenizer = tiktoken.encoding_for_model("text-davinci-003").encode

token_counter = TokenCountingHandler(tokenizer=tokenizer)
callback_manager = CallbackManager([token_counter])

service_context = ServiceContext.from_defaults(callback_manager=callback_manager)
set_global_service_context(service_context)


In [62]:
documents = SimpleDirectoryReader("data").load_data()


In [63]:
new_index = TreeIndex.from_documents(documents)


INFO:llama_index.indices.common_tree.base:> Building index from nodes: 1 chunks
> Building index from nodes: 1 chunks
> Building index from nodes: 1 chunks
> Building index from nodes: 1 chunks
> Building index from nodes: 1 chunks
> Building index from nodes: 1 chunks


In [64]:
print(
    "Embedding Tokens: ",
    token_counter.total_embedding_token_count,
    "\n",
    "LLM Prompt Tokens: ",
    token_counter.prompt_llm_token_count,
    "\n",
    "LLM Completion Tokens: ",
    token_counter.completion_llm_token_count,
    "\n",
    "Total LLM Token Count: ",
    token_counter.total_llm_token_count,
    "\n",
)

Embedding Tokens:  0 
 LLM Prompt Tokens:  7004 
 LLM Completion Tokens:  367 
 Total LLM Token Count:  7371 



In [65]:
# set Logging to DEBUG for more detailed outputs
query_engine = new_index.as_query_engine()
response = query_engine.query("What did the author do growing up?")

INFO:llama_index.indices.tree.select_leaf_retriever:>[Level 0] Selected node: [1]/[1]
>[Level 0] Selected node: [1]/[1]
>[Level 0] Selected node: [1]/[1]
>[Level 0] Selected node: [1]/[1]
>[Level 0] Selected node: [1]/[1]
>[Level 0] Selected node: [1]/[1]
INFO:llama_index.indices.tree.select_leaf_retriever:>[Level 1] Selected node: [1]/[1]
>[Level 1] Selected node: [1]/[1]
>[Level 1] Selected node: [1]/[1]
>[Level 1] Selected node: [1]/[1]
>[Level 1] Selected node: [1]/[1]
>[Level 1] Selected node: [1]/[1]


In [66]:
display(Markdown(f"<b>{response}</b>"))


<b>The author worked on writing and programming outside of school before college. They wrote short stories and tried programming on an IBM 1401 computer using an early version of Fortran. They also mentioned getting a microcomputer, a TRS-80, and started programming more extensively, including writing simple games and a word processor.</b>

In [67]:
# set Logging to DEBUG for more detailed outputs
response = query_engine.query("What did the author do after his time at Y Combinator?")

INFO:llama_index.indices.tree.select_leaf_retriever:>[Level 0] Selected node: [2]/[2]
>[Level 0] Selected node: [2]/[2]
>[Level 0] Selected node: [2]/[2]
>[Level 0] Selected node: [2]/[2]
>[Level 0] Selected node: [2]/[2]
>[Level 0] Selected node: [2]/[2]
INFO:llama_index.indices.tree.select_leaf_retriever:>[Level 1] Selected node: [5]/[5]
>[Level 1] Selected node: [5]/[5]
>[Level 1] Selected node: [5]/[5]
>[Level 1] Selected node: [5]/[5]
>[Level 1] Selected node: [5]/[5]
>[Level 1] Selected node: [5]/[5]


In [68]:
display(Markdown(f"<b>{response}</b>"))


<b>After the author's time at Y Combinator, they decided to pursue painting. They wanted to see how good they could get if they focused on it. They spent most of the rest of 2014 painting, but eventually ran out of steam and stopped working on it. They then started writing essays again and later began working on Lisp.</b>

In [69]:
print(
    "Embedding Tokens: ",
    token_counter.total_embedding_token_count,
    "\n",
    "LLM Prompt Tokens: ",
    token_counter.prompt_llm_token_count,
    "\n",
    "LLM Completion Tokens: ",
    token_counter.completion_llm_token_count,
    "\n",
    "Total LLM Token Count: ",
    token_counter.total_llm_token_count,
    "\n",
)

Embedding Tokens:  0 
 LLM Prompt Tokens:  17246 
 LLM Completion Tokens:  779 
 Total LLM Token Count:  18025 



# Build Tree Index with a custom Summary Prompt, directly retrieve answer from root node

In [70]:
from llama_index.prompts import PromptTemplate


In [71]:
documents = SimpleDirectoryReader("data").load_data()

query_str = "What did the author do growing up?"
SUMMARY_PROMPT_TMPL = (
    "Context information is below. \n"
    "---------------------\n"
    "{context_str}"
    "\n---------------------\n"
    "Given the context information and not prior knowledge, "
    f"answer the question: {query_str}\n"
)
SUMMARY_PROMPT = PromptTemplate(SUMMARY_PROMPT_TMPL)
index_with_query = TreeIndex.from_documents(documents, summary_template=SUMMARY_PROMPT)

INFO:llama_index.indices.common_tree.base:> Building index from nodes: 1 chunks
> Building index from nodes: 1 chunks
> Building index from nodes: 1 chunks
> Building index from nodes: 1 chunks
> Building index from nodes: 1 chunks
> Building index from nodes: 1 chunks


In [72]:
# directly retrieve response from root nodes instead of traversing tree
query_engine = index_with_query.as_query_engine(retriever_mode="root")
response = query_engine.query(query_str)

INFO:llama_index.indices.tree.tree_root_retriever:> Starting query: What did the author do growing up?
> Starting query: What did the author do growing up?
> Starting query: What did the author do growing up?
> Starting query: What did the author do growing up?
> Starting query: What did the author do growing up?
> Starting query: What did the author do growing up?


In [73]:
display(Markdown(f"<b>{response}</b>"))


<b>The author engaged in activities such as writing short stories and programming, including working on an IBM 1401 computer in 9th grade and teaching themselves Lisp. They also worked on reverse-engineering a program called SHRDLU for their undergraduate thesis.</b>

# Using GPT Keyword Table Index

In [74]:
from llama_index import KeywordTableIndex, SimpleDirectoryReader
from IPython.display import Markdown, display

In [75]:
# build keyword index
documents = SimpleDirectoryReader("data").load_data()
index = KeywordTableIndex.from_documents(documents)

In [76]:
# set Logging to DEBUG for more detailed outputs
query_engine = index.as_query_engine()
response = query_engine.query("What did the author do after his time at Y Combinator?")

INFO:llama_index.indices.keyword_table.retrievers:> Starting query: What did the author do after his time at Y Combinator?
> Starting query: What did the author do after his time at Y Combinator?
> Starting query: What did the author do after his time at Y Combinator?
> Starting query: What did the author do after his time at Y Combinator?
> Starting query: What did the author do after his time at Y Combinator?
> Starting query: What did the author do after his time at Y Combinator?
INFO:llama_index.indices.keyword_table.retrievers:query keywords: ['y combinator', 'author', 'combinator', 'time']
query keywords: ['y combinator', 'author', 'combinator', 'time']
query keywords: ['y combinator', 'author', 'combinator', 'time']
query keywords: ['y combinator', 'author', 'combinator', 'time']
query keywords: ['y combinator', 'author', 'combinator', 'time']
query keywords: ['y combinator', 'author', 'combinator', 'time']
INFO:llama_index.indices.keyword_table.retrievers:> Extracted keywords: 

In [77]:
display(Markdown(f"<b>{response}</b>"))


<b>The context does not provide any information about what the author did after his time at Y Combinator.</b>

# Using GPT List Index

In [78]:
from llama_index import SummaryIndex, SimpleDirectoryReader
from IPython.display import Markdown, display

In [79]:

# build summary index
documents = SimpleDirectoryReader("data").load_data()
index = SummaryIndex.from_documents(documents)

In [80]:
# set Logging to DEBUG for more detailed outputs
query_engine = index.as_query_engine()
response = query_engine.query("What did the author do after his time at Y Combinator?")

In [81]:
display(Markdown(f"<b>{response}</b>"))


<b>The context information does not provide any information about the author's time at Y Combinator or what they did after it.</b>

In [82]:
print(
    "Embedding Tokens: ",
    token_counter.total_embedding_token_count,
    "\n",
    "LLM Prompt Tokens: ",
    token_counter.prompt_llm_token_count,
    "\n",
    "LLM Completion Tokens: ",
    token_counter.completion_llm_token_count,
    "\n",
    "Total LLM Token Count: ",
    token_counter.total_llm_token_count,
    "\n",
)

Embedding Tokens:  0 
 LLM Prompt Tokens:  73300 
 LLM Completion Tokens:  3629 
 Total LLM Token Count:  76929 

