In [1]:
import sys
import os
import openai
from openai import OpenAI
from dotenv import load_dotenv, find_dotenv

sys.path.append("../..")

_ = load_dotenv(find_dotenv())  # read local .env file

openai.api_key = os.environ["OPENAI_API_KEY"]

In [2]:
from llama_index.node_parser import SentenceWindowNodeParser

# create the sentence window node parser w/ default settings
node_parser = SentenceWindowNodeParser.from_defaults(
    window_size=3,
    window_metadata_key="window",
    original_text_metadata_key="original_text",
)

In [3]:
from llama_index import Document

pool_table_desc = """ROOM: The picture is taken in a living room or a lounge area.

FURNITURE: There is a gray upholstered sofa with decorative pillows, a round coffee table with a metal frame and a wooden top, and a patterned armchair to the right.

COLOR: The predominant colors in the picture are neutral tones, including shades of gray, beige, and brown.

ELEMENTS: The design elements visible include a mix of geometric and organic patterns on the pillows and armchair, as well as a clean, contemporary style in the furniture design.

ACCENTS: Accent pieces include a white vase with white artificial flowers on the coffee table, a stack of books, and a woven basket with a fringed throw blanket.

TEXTURES: Visible textures include the fabric of the sofa and armchair, the smooth surface of the coffee table, the woven texture of the basket, and the plush carpet.

WINDOWS: There are no window coverings visible in the picture.

LIGHTING: The lighting is not directly visible, but the room appears to be lit by ambient light, possibly from a source outside the frame.

FLOORING: The floor is covered with a light-colored plush carpet.
"""

document = Document(text=pool_table_desc)
document.metadata = {
    "url": "https://deborahrucci.decoratingden.com/wp-content/uploads/sites/280/2019/10/0908O2-19.jpg"
}

nodes = node_parser.get_nodes_from_documents([document])

In [4]:
document.metadata

{'url': 'https://deborahrucci.decoratingden.com/wp-content/uploads/sites/280/2019/10/0908O2-19.jpg'}

In [6]:
len(nodes)

9

In [5]:
nodes[0].metadata

{'window': 'ROOM: The picture is taken in a living room or a lounge area.\n\n FURNITURE: There is a gray upholstered sofa with decorative pillows, a round coffee table with a metal frame and a wooden top, and a patterned armchair to the right.\n\n COLOR: The predominant colors in the picture are neutral tones, including shades of gray, beige, and brown.\n\n',
 'original_text': 'ROOM: The picture is taken in a living room or a lounge area.\n\n',
 'url': 'https://deborahrucci.decoratingden.com/wp-content/uploads/sites/280/2019/10/0908O2-19.jpg'}

In [7]:
import os
from llama_index import VectorStoreIndex, StorageContext, load_index_from_storage
from llama_index.node_parser import SentenceWindowNodeParser
from llama_index.indices.postprocessor import MetadataReplacementPostProcessor
from llama_index.indices.postprocessor import SentenceTransformerRerank


def get_or_create_vector_store_index(document, service_context):
    if not os.path.exists("./sentence_index"):
        # If the index doesn't yet exist, create it
        sentence_index = VectorStoreIndex.from_documents(
            [document], service_context=service_context
        )

        sentence_index.storage_context.persist(persist_dir="./sentence_index")
    else:
        # If the index already exists, load it from storage
        sentence_index = load_index_from_storage(
            StorageContext.from_defaults(persist_dir="./sentence_index"),
            service_context=service_context,
        )
        sentence_index.insert(document)
    return sentence_index


def build_sentence_window_index(
    document, llm, embed_model="local:BAAI/bge-small-en-v1.5", save_dir="sentence_index"
):
    # create the sentence window node parser w/ default settings
    node_parser = SentenceWindowNodeParser.from_defaults(
        window_size=3,
        window_metadata_key="window",
        original_text_metadata_key="original_text",
    )
    sentence_context = ServiceContext.from_defaults(
        llm=llm,
        embed_model=embed_model,
        node_parser=node_parser,
    )
    if not os.path.exists(save_dir):
        sentence_index = VectorStoreIndex.from_documents(
            [document], service_context=sentence_context
        )
        sentence_index.storage_context.persist(persist_dir=save_dir)
    else:
        sentence_index = load_index_from_storage(
            StorageContext.from_defaults(persist_dir=save_dir),
            service_context=sentence_context,
        )

    return sentence_index


def get_sentence_window_query_engine(
    sentence_index,
    similarity_top_k=6,
    rerank_top_n=2,
):
    # define postprocessors
    postproc = MetadataReplacementPostProcessor(target_metadata_key="window")
    rerank = SentenceTransformerRerank(
        top_n=rerank_top_n, model="BAAI/bge-reranker-base"
    )

    sentence_window_engine = sentence_index.as_query_engine(
        similarity_top_k=similarity_top_k, node_postprocessors=[postproc, rerank]
    )
    return sentence_window_engine

In [8]:
from llama_index import ServiceContext
from llama_index.llms import OpenAI

llm = OpenAI(temperature=0, model="gpt-4")
# service_context = ServiceContext.from_defaults(
#     llm=llm,
#     embed_model="local:BAAI/bge-large-en-v1.5",
#     node_parser=node_parser,
# )

# index = get_or_create_vector_store_index(document, service_context)
index = build_sentence_window_index(
    document,
    llm,
    embed_model="local:BAAI/bge-large-en-v1.5",
    save_dir="./sentence_index",
)

In [16]:
index.insert(document)

In [32]:
sentence_window_engine = get_sentence_window_query_engine(index)

In [18]:
type(index)
index.ref_doc_info.keys()

dict_keys(['530f2127-b849-4beb-ac6e-8622dec45c34', 'bef2caca-7be5-48ec-a09a-560846d3162b'])

In [37]:
response = sentence_window_engine.query(
    "please return the URL of an image that includes a coffee table with some books on it"
)

In [38]:
from llama_index.response.notebook_utils import display_response

display_response(response, show_source=True)

**`Final Response:`** The URL of the image that includes a coffee table with some books on it is https://deborahrucci.decoratingden.com/wp-content/uploads/sites/280/2019/10/0908O2-19.jpg.

---

**`Source Node 1/2`**

**Node ID:** fae12fff-7e4c-4f1b-9683-e339067686c5<br>**Similarity:** 0.5907028317451477<br>**Text:** FURNITURE: There is a gray upholstered sofa with decorative pillows, a round coffee table with a ...<br>

---

**`Source Node 2/2`**

**Node ID:** 03b89337-8a99-4ab1-8c9e-40c7d22e3a50<br>**Similarity:** 0.44890716671943665<br>**Text:** COLOR: The predominant colors in the picture are neutral tones, including shades of gray, beige, ...<br>