In [None]:
#<imports>
#for access token
import os
from getpass import getpass

#for llamaindex
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.readers.web import SimpleWebPageReader
from llama_index.core import ServiceContext, StorageContext,load_index_from_storage, Settings
from llama_index.core.node_parser import SimpleNodeParser, MarkdownNodeParser, SentenceSplitter
from langchain.embeddings import HuggingFaceInferenceAPIEmbeddings
from llama_index.embeddings.langchain import LangchainEmbedding
from llama_index.llms.huggingface import HuggingFaceInferenceAPI

#getting respones
from llama_index.core import get_response_synthesizer
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.retrievers import VectorIndexRetriever

#<\imports>

In [None]:
#<get access token>
HF_TOKEN = getpass()
os.environ['HUGGINGFACEHUB_API_TOKEN'] = HF_TOKEN
#<\get access token>

In [None]:
# <settings>
# Persist Directory - The persist directory in LlamaIndex is the location on disk where the indexed data and metadata are stored to avoid the time and cost of re-indexing the data.
# LLM Model - LLM Model uses the query and the retrieved documents to generate a response
# Embedding Model - Embedding Model generates vector embeddings which are to be stored in a vector store.
PERSIST_DIR = "./storage"
MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
EMBED_MODEL = "thenlper/gte-large"

Settings.llm = HuggingFaceInferenceAPI(model_name=MODEL_NAME, token=HF_TOKEN)
Settings.embed_model = LangchainEmbedding(HuggingFaceInferenceAPIEmbeddings(api_key=HF_TOKEN,
model_name=EMBED_MODEL))
# <\settings>

In [None]:
#<add data to persist directory>
if not os.path.exists(PERSIST_DIR):
    documents = SimpleDirectoryReader("data").load_data()
    parser = SimpleNodeParser()
    nodes = parser.get_nodes_from_documents(documents)

    storage_context = StorageContext.from_defaults() #vector store
    index = VectorStoreIndex(nodes, storage_context=storage_context)
    index.storage_context.persist(persist_dir=PERSIST_DIR)
else:
    storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
    index = load_index_from_storage(storage_context)
#<\add data to persist directory>

In [None]:
#<defining query engine>
query_engine = index.as_query_engine()
#<\defining query engine>

In [None]:
#<make a query>
query_engine.query("summarize the story")
#<\make a query>

In [None]:
#<generating responses>
response_synthesizer = get_response_synthesizer()
vector_retriever = VectorIndexRetriever(index=index, similarity_top_k=2)
vector_query_engine = RetrieverQueryEngine(retriever=vector_retriever, response_synthesizer=response_synthesizer,)
#<\generating responses>

In [None]:
#<getting a response>
print(vector_query_engine.query("summarize the text"))
#<\getting a response>