# Topics Covered

1.   VectorStoreIndex   
2.   Storage Context
3.   Service Context
4.   List Index
5.   Keyword Table Index

In [None]:
!pip install llama-index==0.7.0

Collecting llama-index==0.7.0
  Downloading llama_index-0.7.0-py3-none-any.whl (566 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m566.2/566.2 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting tiktoken (from llama-index==0.7.0)
  Downloading tiktoken-0.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m12.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting dataclasses-json (from llama-index==0.7.0)
  Downloading dataclasses_json-0.5.14-py3-none-any.whl (26 kB)
Collecting langchain>=0.0.218 (from llama-index==0.7.0)
  Downloading langchain-0.0.273-py3-none-any.whl (1.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m16.9 MB/s[0m eta [36m0:00:00[0m
Collecting openai>=0.26.4 (from llama-index==0.7.0)
  Downloading openai-0.27.9-py3-none-any.whl (75 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [3

# VectorStoreIndex

In [None]:
from llama_index import VectorStoreIndex, SimpleDirectoryReader
from IPython.display import display, HTML
import openai
openai.api_key = 'YOUR_OPENAI_API_KEY'

# Load documents and build index
documents = SimpleDirectoryReader("data").load_data()
index = VectorStoreIndex.from_documents(documents)

query_engine = index.as_query_engine(similarity_top_k=3)
response = query_engine.query("What did the author do growing up?")

# print the synthesized response.
display(HTML(f'<p style="font-size:20px">{response.response}</p>'))

# Storage Context

In [None]:
from llama_index import StorageContext, load_index_from_storage
from llama_index.storage.docstore import SimpleDocumentStore
from llama_index.storage.index_store import SimpleIndexStore
from llama_index.vector_stores import SimpleVectorStore
from llama_index.node_parser import SimpleNodeParser

# create parser and parse document into nodes
parser = SimpleNodeParser()
nodes = parser.get_nodes_from_documents(documents)

# create storage context using default stores
storage_context = StorageContext.from_defaults(
    docstore=SimpleDocumentStore(),
    vector_store=SimpleVectorStore(),
    index_store=SimpleIndexStore(),
)

# # build index
index = VectorStoreIndex(nodes, storage_context=storage_context)

# save index
index.storage_context.persist(persist_dir="storage")

In [None]:
# to load index later, make sure you setup the storage context
# this will loaded the persisted stores from persist_dir
storage_context = StorageContext.from_defaults(persist_dir="storage")

# then load the index object
# if loading multiple indexes from a persist dir
loaded_index = load_index_from_storage(storage_context)

# setup query engine
query_engine = loaded_index.as_query_engine(similarity_top_k=3)
response = query_engine.query("What did the author do growing up?")

# print the synthesized response.
display(HTML(f'<p style="font-size:20px">{response.response}</p>'))

# Service Context

In [None]:
from llama_index import ServiceContext, LLMPredictor, OpenAIEmbedding, PromptHelper
from llama_index.llms import OpenAI
from llama_index.langchain_helpers.text_splitter import TokenTextSplitter
from llama_index.node_parser import SimpleNodeParser

llm = OpenAI(model='gpt-4', temperature=0, max_tokens=256)
embed_model = OpenAIEmbedding()
node_parser = SimpleNodeParser.from_defaults(chunk_size=40, chunk_overlap=0)


prompt_helper = PromptHelper(
  context_window=4096,
  num_output=512,
  chunk_overlap_ratio=0.1,
)

service_context = ServiceContext.from_defaults(
  llm=llm,
  embed_model=embed_model,
  node_parser=node_parser,
  prompt_helper=prompt_helper
)

index = VectorStoreIndex.from_documents(documents, service_context = service_context)

query_engine = index.as_query_engine(similarity_top_k=3)
response = query_engine.query("What did the author do growing up?")

# print the synthesized response.
display(HTML(f'<p style="font-size:20px">{response.response}</p>'))

# List Index

In [None]:
from llama_index import ListIndex

# build index
index = ListIndex.from_documents(documents)

query_engine = index.as_query_engine(response_mode='tree_summarize')
response = query_engine.query("What is the summary of the document?")

# print the synthesized response.
display(HTML(f'<p style="font-size:20px">{response.response}</p>'))

In [None]:
query_engine = index.as_query_engine(similarity_top_k=2, response_mode='tree_summarize')
response = query_engine.query("What is the summary of the document?")

# print the synthesized response.
display(HTML(f'<p style="font-size:20px">{response.response}</p>'))

# Keyword Table Index

In [None]:
from llama_index import KeywordTableIndex

# build index
index = KeywordTableIndex.from_documents(documents)

# query
query_engine = index.as_query_engine()
response = query_engine.query("did the author complete his graduation from Stanford?")

# print the synthesized response.
display(HTML(f'<p style="font-size:20px">{response.response}</p>'))

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
