In [20]:
import openai
from llama_index.llms import OpenAI
llm = OpenAI(model="gpt-3.5-turbo", temperature=0.1)

In [3]:
import llama_index
from llama_index.vector_stores import ElasticsearchStore

In [None]:
# create documents
from llama_index import VectorStoreIndex, SimpleDirectoryReader, Document
reader = SimpleDirectoryReader(input_dir="../data/")
docs = reader.load_data()

In [19]:
document = Document(text="\n\n".join([doc.text for doc in docs]))

## Setup Vector Store

In [10]:
# vector store connect
es_store = ElasticsearchStore(
    index_name="test-llamaidx",
    es_url="http://localhost:9200",)

## Sentence Window Retrieval

https://docs.llamaindex.ai/en/stable/examples/node_postprocessor/MetadataReplacementDemo.html

In [11]:
from llama_index import ServiceContext, VectorStoreIndex, StorageContext
from llama_index.node_parser import SentenceWindowNodeParser
from llama_index.indices.postprocessor import MetadataReplacementPostProcessor
from llama_index.indices.postprocessor import SentenceTransformerRerank


In [12]:
# build index of sentence windows from document. 
def build_sentence_window_index(
    document, llm, vector_store, embed_model="local:BAAI/bge-small-en-v1.5"
):
    # parse documents into sentence windows
    node_parser = SentenceWindowNodeParser.from_defaults(
        window_size=3,
        window_metadata_key="window",
        original_text_metadata_key="original_text",
    )

    # manage services needed to build the index
    sentence_context = ServiceContext.from_defaults(
        llm=llm,
        embed_model=embed_model,
        node_parser=node_parser
    )
    # manage the storage of index
    storage_context = StorageContext.from_defaults(vector_store=vector_store)
    # create index using service context and storage context
    sentence_index = VectorStoreIndex.from_documents(
        [document], service_context=sentence_context, storage_context=storage_context
    )

    return sentence_index



In [13]:
# Create a query engine from given sentence window index.
def get_sentence_window_query_engine(
    sentence_index,
    similarity_top_k=6, # num of similar results to return 
    rerank_top_n=2, # num of top results to rerank
):

    postproc = MetadataReplacementPostProcessor(target_metadata_key="window")
    # reranks top results using sentence transformers
    rerank = SentenceTransformerRerank(
        top_n=rerank_top_n, model="BAAI/bge-reranker-base"
    )
    
    sentence_window_engine = sentence_index.as_query_engine(
        similarity_top_k=similarity_top_k, node_postprocessors=[postproc, rerank]
    )
    return sentence_window_engine

> The reranking process involves using the sentence transformer model to compute new similarity scores for the top rerank_top_n results and then sorting these results based on the new scores. This can help improve the relevance of the results by taking into account more complex semantic similarities that might not be captured by the initial ranking.



In [40]:
sentence_index = build_sentence_window_index(
    document,
    llm,
    embed_model="local:BAAI/bge-small-en-v1.5",
    vector_store=es_store
)

query_engine = get_sentence_window_query_engine(sentence_index=sentence_index, rerank_top_n=5)


### Prompt Templates

In [None]:
# prompt templates
print(f"Prompt Templates:\n")
print(f"#"*20)
print(query_engine.get_prompts()["response_synthesizer:text_qa_template"].default_template.template)
print(f"#"*20)
print(query_engine.get_prompts()["response_synthesizer:refine_template"].default_template.template)

### Response

In [75]:
resp = query_engine.query(
    "There is no manager option in gospot account"
)
print(resp)


If your company has not purchased Role-based Access as an addition to your GoSpotCheck Account, you will not see the option for the Manager role in your account. To find out more about setting up roles and access levels, please contact your designated GoSpotCheck Customer Success Manager.


In [None]:
print(resp.source_nodes[0].node.metadata["window"])
print("-"*30)
print(resp.source_nodes[0].node.metadata["original_text"])


### Source documents

In [None]:
for source_node in resp.source_nodes:
    print(source_node.node.text)
    print("-"*20)