In [None]:
from llama_index.llms import HuggingFaceLLM
from llama_index.retrievers import VectorIndexRetriever
from llama_index.query_engine import RetrieverQueryEngine
from llama_index import (
    StorageContext, 
    load_index_from_storage,
    VectorStoreIndex,
    SimpleDirectoryReader,
    ServiceContext)

In [None]:
# Using model from https://huggingface.co/Writer/palmyra-small
DEFAULT_MODEL_NAME = "Writer/palmyra-small"

### Create a new model and use it to create embeddings based on files in directory /data

In [None]:
# Initialize local model from Huggingface
llm = HuggingFaceLLM(
    model_name=DEFAULT_MODEL_NAME,
    tokenizer_name=DEFAULT_MODEL_NAME)

# Create local service context
service_context = ServiceContext.from_defaults(
    llm=llm, embed_model="local")

# Read all docs from directory "/data"
documents = SimpleDirectoryReader("data").load_data()

# Create index based on documents using local llm
index = VectorStoreIndex.from_documents(
    documents, service_context=service_context, llm=llm)

# Store index locally in directory "/index", delete current index for demonstration purpose
index.storage_context.persist(persist_dir="index")
del index

### Load index from directory /index into memory

In [None]:
# Load index from directory "/index"
storage_context = StorageContext.from_defaults(persist_dir="index")
index = load_index_from_storage(
    storage_context=storage_context, service_context=service_context)

# Create a retriever
retriever = VectorIndexRetriever(
    index=index,
    service_context=service_context)

# Build engine to retrieve data from vector store
query_engine = RetrieverQueryEngine.from_args(
    retriever, service_context=service_context)

### Answer question based on embedded data

In [None]:
# Create response to a question
user_query = "Who is Sir Reginald?"

response = query_engine.query(user_query)
print(response)