In [1]:
import llama_index
llama_index.set_global_handler("simple")

In [2]:
import os

os.environ["OPENAI_API_KEY"] = "sk-..."

import logging
import sys

logging.basicConfig(
    stream=sys.stdout, level=logging.INFO
)  # logging.DEBUG for more verbose output

from llama_index import (
    KnowledgeGraphIndex,
    ServiceContext,
    SimpleDirectoryReader,
    SimpleKeywordTableIndex,
    VectorStoreIndex
)
from llama_index.storage.storage_context import StorageContext
from llama_index.graph_stores import NebulaGraphStore
from llama_index.llms import OpenAI

from IPython.display import Markdown, display
from llama_index.llms.palm import PaLM
from llama_index.embeddings import GooglePaLMEmbedding


from llama_index.callbacks import (
    CallbackManager,
    LlamaDebugHandler
)


from llama_index.retrievers import (
    KeywordTableSimpleRetriever
)


In [3]:
llama_debug = LlamaDebugHandler(print_trace_on_end=True)
callback_manager = CallbackManager([llama_debug])
palm_api_key  = "AIzaSyApBCzqW_RF4qbkX9kMoNwjooIqrm8oZEQ"
model = PaLM(api_key=palm_api_key)

model_name = "models/embedding-gecko-001"
embed_model = GooglePaLMEmbedding(model_name=model_name, api_key=palm_api_key)

service_context = ServiceContext.from_defaults(
                                    llm = model,
                                    embed_model = embed_model,
                                    chunk_size=512,
                                    callback_manager=callback_manager)

In [4]:
from llama_index.postprocessor import (
    SimilarityPostprocessor,
    CohereRerank,
)
from llama_index.schema import Node, NodeWithScore
from llama_index.postprocessor import LongContextReorder

nodes = [
    NodeWithScore(node=Node(text="Climate change is the issue"), score=0.7),
    NodeWithScore(node=Node(text="Social meida is the issue"), score=0.8),
]

# similarity postprocessor: filter nodes below 0.75 similarity score
processor = SimilarityPostprocessor(similarity_cutoff=0.75)
filtered_nodes = processor.postprocess_nodes(nodes)

In [5]:
filtered_nodes

[NodeWithScore(node=TextNode(id_='32e1fa04-c5a4-49c7-aa63-9dbf3fa8a280', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, hash='820020e3305b080c7e14c7f32f612001e5a5be9de6cd58edb9cd031c8afd04c5', text='Social meida is the issue', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n'), score=0.8)]

In [6]:
# cohere rerank: rerank nodes given query using trained model
reranker = CohereRerank(api_key="JiiME47SI8jHKTEzj3iK0eEEPTnU7h7a7Vk9biuk", top_n=2)
reranker.postprocess_nodes(nodes, query_str="What is social meia")

[NodeWithScore(node=TextNode(id_='32e1fa04-c5a4-49c7-aa63-9dbf3fa8a280', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, hash='820020e3305b080c7e14c7f32f612001e5a5be9de6cd58edb9cd031c8afd04c5', text='Social meida is the issue', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n'), score=0.7831604),
 NodeWithScore(node=TextNode(id_='53c7ce03-243a-4ba8-8dcd-1dbc37e7473d', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, hash='ae14ecbf22f7e62001b303bbe72ca9eeaa8fed928f2a9c1bd2f75c6e5ebb232d', text='Climate change is the issue', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n'), score=0.018869255)]

In [7]:
from llama_index.schema import IndexNode, TextNode
storage_context = StorageContext.from_defaults()

nodes = [
    TextNode(text="Climate change is the issue increasing water melting glasiar"),
    TextNode(text="Climate change melting ice increasing water "),
    TextNode(text="Social meida is the issue"),
    TextNode(text="climate change is causing Social meida is the issue"),
]

In [8]:
nodes

[TextNode(id_='84541188-4135-4db6-8ed0-c2002c9d1d88', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, hash='628d247ce40a21d6fb167cd1cf4210c407211d3146e01cd0cdef4b81a3ae84ac', text='Climate change is the issue increasing water melting glasiar', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n'),
 TextNode(id_='a075971a-2cfb-402d-b6a7-e87b8ea3a9ec', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, hash='dce791da8765d9f0d53daa1b26c4c24115061b930013ea21b8a210a296c4242e', text='Climate change melting ice increasing water ', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n'),
 TextNode(id_='c062c267-59f8-4dc5-a9bc-d8e6a199d3d4', embedding=None, metadata={}, excluded_embed_metadata_k

In [9]:
index = VectorStoreIndex(nodes = nodes,
                         service_context = service_context,
                        storage_context= storage_context)

query_engine = index.as_query_engine(
    node_postprocessors=[reranker],
    verbose = True
)

# all node post-processors will be applied during each query
response = query_engine.query("climate chnage melting isuue")


**********
Trace: index_construction
    |_embedding ->  1.834371 seconds
**********
** Prompt: **
Context information is below.
---------------------
Climate change melting ice increasing water

Climate change is the issue increasing water melting glasiar
---------------------
Given the context information and not prior knowledge, answer the query.
Query: climate chnage melting isuue
Answer: 
**************************************************
** Completion: **
increasing water
**************************************************


**********
Trace: query
    |_query ->  1.788564 seconds
      |_retrieve ->  0.487139 seconds
        |_embedding ->  0.485148 seconds
      |_reranking ->  0.40471 seconds
      |_synthesize ->  0.896715 seconds
        |_templating ->  0.0 seconds
        |_llm ->  0.888773 seconds
**********


In [10]:
response

Response(response='increasing water', source_nodes=[NodeWithScore(node=TextNode(id_='a075971a-2cfb-402d-b6a7-e87b8ea3a9ec', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, hash='dce791da8765d9f0d53daa1b26c4c24115061b930013ea21b8a210a296c4242e', text='Climate change melting ice increasing water ', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n'), score=0.18082882), NodeWithScore(node=TextNode(id_='84541188-4135-4db6-8ed0-c2002c9d1d88', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, hash='628d247ce40a21d6fb167cd1cf4210c407211d3146e01cd0cdef4b81a3ae84ac', text='Climate change is the issue increasing water melting glasiar', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n'), s

In [11]:
index.as_retriever().retrieve("climate chnage melting isuue")

**********
Trace: query
    |_retrieve ->  0.634157 seconds
      |_embedding ->  0.632136 seconds
**********


[NodeWithScore(node=TextNode(id_='84541188-4135-4db6-8ed0-c2002c9d1d88', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, hash='628d247ce40a21d6fb167cd1cf4210c407211d3146e01cd0cdef4b81a3ae84ac', text='Climate change is the issue increasing water melting glasiar', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n'), score=0.8336109016073495),
 NodeWithScore(node=TextNode(id_='a075971a-2cfb-402d-b6a7-e87b8ea3a9ec', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, hash='dce791da8765d9f0d53daa1b26c4c24115061b930013ea21b8a210a296c4242e', text='Climate change melting ice increasing water ', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n'), score=0.7955372667616035)]

In [12]:
score_nodes = [
    NodeWithScore(node=Node(text="Climate change is the issue"), score=0.7),
    NodeWithScore(node=Node(text="Climate change Social meida is the issue"), score=0.8),
]


postprocessor = LongContextReorder()

postprocessor.postprocess_nodes(score_nodes)

[NodeWithScore(node=TextNode(id_='45162b33-63a4-498a-b4e1-fdcb06ee06a4', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, hash='ae14ecbf22f7e62001b303bbe72ca9eeaa8fed928f2a9c1bd2f75c6e5ebb232d', text='Climate change is the issue', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n'), score=0.7),
 NodeWithScore(node=TextNode(id_='84829d82-3fc5-44aa-bf2d-d15c686ab53d', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, hash='8b122c8e36e4c9163a7c1cfe2e9121f52c532679f10fe9a06c529422b7950604', text='Climate change Social meida is the issue', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n'), score=0.8)]

In [13]:
from llama_index.postprocessor import KeywordNodePostprocessor

postprocessor = KeywordNodePostprocessor(
    required_keywords=["Climate", "ice"], exclude_keywords=["Social"]
)

postprocessor.postprocess_nodes(score_nodes)

[NodeWithScore(node=TextNode(id_='45162b33-63a4-498a-b4e1-fdcb06ee06a4', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, hash='ae14ecbf22f7e62001b303bbe72ca9eeaa8fed928f2a9c1bd2f75c6e5ebb232d', text='Climate change is the issue', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n'), score=0.7)]

## SentenceEMbedding Optimizer

filter sentences those are not relevent to query remove those

In [14]:
from llama_index.postprocessor import SentenceEmbeddingOptimizer


score_nodes = [
    NodeWithScore(node=Node(text="Climate change is the issue. Social media is the another issue"), score=0.7),
    NodeWithScore(node=Node(text="Climate change Social meida is the issue"), score=0.8),
]

postprocessor = SentenceEmbeddingOptimizer(
    embed_model=service_context.embed_model,
    percentile_cutoff=0.5,
    # threshold_cutoff=0.7
)

postprocessor.postprocess_nodes(score_nodes)

[NodeWithScore(node=TextNode(id_='b58df178-2db0-44a2-97cf-8c2b2a809023', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, hash='63ae0dfe6617f7d7ea00f57a211e0b3455df166461926861fd34da0d55cd62ef', text='Climate change is the issue. Social media is the another issue', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n'), score=0.7),
 NodeWithScore(node=TextNode(id_='6442f981-a25b-44d5-a6ae-ee2948387cae', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, hash='8b122c8e36e4c9163a7c1cfe2e9121f52c532679f10fe9a06c529422b7950604', text='Climate change Social meida is the issue', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n'), score=0.8)]

In [15]:
from llama_index import download_loader

WikipediaReader = download_loader("WikipediaReader")

loader = WikipediaReader()
documents = loader.load_data(pages=["Berlin"])

In [16]:
from llama_index import VectorStoreIndex

index = VectorStoreIndex.from_documents(documents, service_context =service_context)

**********
Trace: index_construction
    |_node_parsing ->  0.128634 seconds
      |_chunking ->  0.121668 seconds
    |_embedding ->  1.076423 seconds
    |_embedding ->  1.078088 seconds
    |_embedding ->  1.079149 seconds
    |_embedding ->  1.1416 seconds
    |_embedding ->  0.853206 seconds
**********


In [17]:
import time
from llama_index import VectorStoreIndex
from llama_index.postprocessor import SentenceEmbeddingOptimizer

print("Without optimization")
start_time = time.time()
query_engine = index.as_query_engine()
res = query_engine.query("What is the population of Berlin?")
end_time = time.time()
print("Total time elapsed: {}".format(end_time - start_time))
print("Answer: {}".format(res))


Without optimization
** Prompt: **
Context information is below.
---------------------
It is the only observation tower which stands on insulators and has a restaurant 55 m (180 ft) and an observation deck 126 m (413 ft) above ground, which is reachable by a windowed elevator.
The Oberbaumbrücke over the Spree river is Berlin's most iconic bridge, connecting the now-combined boroughs of Friedrichshain and Kreuzberg. It carries vehicles, pedestrians, and the U1 Berlin U-Bahn line. The bridge was completed in a brick gothic style in 1896, replacing the former wooden bridge with an upper deck for the U-Bahn. The center portion was demolished in 1945 to stop the Red Army from crossing. After the war, the repaired bridge served as a checkpoint and border crossing between the Soviet and American sectors, and later between East and West Berlin. In the mid-1950s, it was closed to vehicles, and after the construction of the Berlin Wall in 1961, pedestrian traffic was heavily restricted. Followi

In [18]:
index.as_retriever().retrieve("What is the population of Berlin?")

**********
Trace: query
    |_retrieve ->  0.49027 seconds
      |_embedding ->  0.482214 seconds
**********


[NodeWithScore(node=TextNode(id_='8331e4a4-4d3a-4961-9e04-fe89eda4af49', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='475de4bd-7590-4575-b3e0-ea59a2f42133', node_type=<ObjectType.DOCUMENT: '4'>, metadata={}, hash='bbb4fccdec26619010310ee9c02992467d2776ed6aa023471e221f1b2e014017'), <NodeRelationship.PREVIOUS: '2'>: RelatedNodeInfo(node_id='ab245bbb-442d-44e0-81db-4c01f3d38c3c', node_type=<ObjectType.TEXT: '1'>, metadata={}, hash='adcf83701cbab594d34b922999fa4554f92b7cebdf7d1a3139ba7c7595a8000f'), <NodeRelationship.NEXT: '3'>: RelatedNodeInfo(node_id='96324d2b-426e-48ad-8c36-e76cfb94feb4', node_type=<ObjectType.TEXT: '1'>, metadata={}, hash='2692e6a36a01815fec2b6f5bbbcc01bbde53b31ac89923d01ed3804543317676')}, hash='fc3620949bd21b2edc1b58af675b488c38bbcd60978dbf2ddaf0dbad38a9a823', text="It is the only observation tower which stands on insulators and has a restaurant 55 

In [19]:
res.source_nodes

[NodeWithScore(node=TextNode(id_='8331e4a4-4d3a-4961-9e04-fe89eda4af49', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='475de4bd-7590-4575-b3e0-ea59a2f42133', node_type=<ObjectType.DOCUMENT: '4'>, metadata={}, hash='bbb4fccdec26619010310ee9c02992467d2776ed6aa023471e221f1b2e014017'), <NodeRelationship.PREVIOUS: '2'>: RelatedNodeInfo(node_id='ab245bbb-442d-44e0-81db-4c01f3d38c3c', node_type=<ObjectType.TEXT: '1'>, metadata={}, hash='adcf83701cbab594d34b922999fa4554f92b7cebdf7d1a3139ba7c7595a8000f'), <NodeRelationship.NEXT: '3'>: RelatedNodeInfo(node_id='96324d2b-426e-48ad-8c36-e76cfb94feb4', node_type=<ObjectType.TEXT: '1'>, metadata={}, hash='2692e6a36a01815fec2b6f5bbbcc01bbde53b31ac89923d01ed3804543317676')}, hash='fc3620949bd21b2edc1b58af675b488c38bbcd60978dbf2ddaf0dbad38a9a823', text="It is the only observation tower which stands on insulators and has a restaurant 55 

In [20]:
print("With optimization")
start_time = time.time()
query_engine = index.as_query_engine(
    node_postprocessors=[SentenceEmbeddingOptimizer(percentile_cutoff=0.5, context_before = False, context_after = False, embed_model= embed_model)]
)
res = query_engine.query("What is the population of Berlin?")
end_time = time.time()
print("Total time elapsed: {}".format(end_time - start_time))
print("Answer: {}".format(res))

With optimization
** Prompt: **
Context information is below.
---------------------
== Demographics ==

At the end of 2018, the city-state of Berlin had 3.75 million registered inhabitants in an area of 891.1 km2 (344.1 sq mi). In 2019, the urban area of Berlin had about 4.5 million inhabitants. Berlin is the most populous city proper in the European Union. The entire Berlin-Brandenburg capital region has a population of more than 6 million in an area of 30,546 km2 (11,794 sq mi). More than 337,000 families with children under the age of 18 lived in Berlin. In 2014, the city-state Berlin had 37,368 live births (+6.6%), a record number since 1991. In 2014, the German capital registered a migration surplus of approximately 40,000 people. The city's population density was 4,206 inhabitants per km2. Almost 2.0 million households were counted in the city. The Oberbaumbrücke over the Spree river is Berlin's most iconic bridge, connecting the now-combined boroughs of Friedrichshain and Kreuzb

In [21]:
query_engine = index.as_query_engine(
    similarity_top_k=10,
    node_postprocessors=[reranker],
)
response = query_engine.query(
    "What is the population of Berlin?",
)

** Prompt: **
Context information is below.
---------------------
Berlin ( bur-LIN, German: [bɛʁˈliːn] ) is the capital and largest city of Germany by both area and population. Its more than 3.85 million inhabitants make it the European Union's most populous city, according to population within city limits. The greater urban area of Berlin is one of the States of Germany. Berlin is surrounded by the State of Brandenburg and Brandenburg's capital Potsdam is nearby. Berlin's urban area has a population of around 4.5 million and is therefore the most populous urban area in Germany. The Berlin-Brandenburg capital region has around 6.2 million inhabitants and is Germany's second-largest metropolitan region after the Rhine-Ruhr region and the sixth biggest Metropolitan Region by GDP in the European Union.Berlin was built along the banks of the Spree river, which flows into the Havel in the western borough of Spandau. The city incorporates lakes in the western and southeastern boroughs, the l

In [25]:
from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
from llama_index.postprocessor import (
    FixedRecencyPostprocessor,
    EmbeddingRecencyPostprocessor,
)
from llama_index.text_splitter import SentenceSplitter
from llama_index.storage.docstore import SimpleDocumentStore
from llama_index.response.notebook_utils import display_response

In [26]:
# load documents
from llama_index.storage.storage_context import StorageContext


def get_file_metadata(file_name: str):
    """Get file metadata."""
    if "v1" in file_name:
        return {"date": "2020-01-01"}
    elif "v2" in file_name:
        return {"date": "2020-02-03"}
    elif "v3" in file_name:
        return {"date": "2022-04-12"}
    else:
        raise ValueError("invalid file")


documents = SimpleDirectoryReader(
    input_files=[
        "test_versioned_data/paul_graham_essay_v1.txt",
        "test_versioned_data/paul_graham_essay_v2.txt",
        "test_versioned_data/paul_graham_essay_v3.txt",
    ],
    file_metadata=get_file_metadata,
).load_data()

# define service context (wrapper container around current classes)
text_splitter = SentenceSplitter(chunk_size=512)
service_context = ServiceContext.from_defaults(text_splitter=text_splitter)

# use node parser to parse into nodes
nodes = text_splitter.get_nodes_from_documents(documents)

# add to docstore
docstore = SimpleDocumentStore()
docstore.add_documents(nodes)

storage_context = StorageContext.from_defaults(docstore=docstore)

ValueError: File test_versioned_data/paul_graham_essay_v1.txt does not exist.

**********
Trace: index_construction
    |_node_parsing ->  0.112767 seconds
      |_chunking ->  0.097766 seconds
    |_embedding ->  2.442238 seconds
    |_embedding ->  1.024255 seconds
    |_embedding ->  1.120393 seconds
    |_embedding ->  1.051535 seconds
    |_embedding ->  0.927631 seconds
**********


In [31]:
from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
from llama_index.postprocessor import (
    PrevNextNodePostprocessor,
    AutoPrevNextNodePostprocessor,
)
from llama_index.node_parser import SentenceSplitter
from llama_index.storage.docstore import SimpleDocumentStore


In [34]:
# load documents
from llama_index.storage.storage_context import StorageContext

# use node parser in service context to parse into nodes
nodes = service_context.node_parser.get_nodes_from_documents(documents)

# add to docstore
docstore = SimpleDocumentStore()
docstore.add_documents(nodes)

storage_context = StorageContext.from_defaults(docstore=docstore)

index = VectorStoreIndex.from_documents(documents, storage_context=storage_context, service_context= service_context)

**********
Trace: index_construction
    |_node_parsing ->  0.090101 seconds
      |_chunking ->  0.08411 seconds
    |_embedding ->  1.038148 seconds
    |_embedding ->  1.158727 seconds
    |_embedding ->  1.061991 seconds
    |_embedding ->  1.122162 seconds
    |_embedding ->  0.854365 seconds
**********


In [35]:
node_postprocessor = PrevNextNodePostprocessor(docstore=docstore, num_nodes=4)


In [36]:
query_engine = index.as_query_engine(
    similarity_top_k=1,
    node_postprocessors=[node_postprocessor],
    response_mode="tree_summarize",
)
response = query_engine.query(
    "art and culture of berlin",
)

** Prompt: **
Context information from multiple sources is below.
---------------------
The Brücke Museum features one of the largest collection of works by artist of the early 20th-century expressionist movement. In Lichtenberg, on the grounds of the former East German Ministry for State Security, is the Stasi Museum. The site of Checkpoint Charlie, one of the most renowned crossing points of the Berlin Wall, is still preserved. A private museum venture exhibits a comprehensive documentation of detailed plans and strategies devised by people who tried to flee from the East.
The Beate Uhse Erotic Museum claimed to be the largest erotic museum in the world until it closed in 2014.The cityscape of Berlin displays large quantities of urban street art. It has become a significant part of the city's cultural heritage and has its roots in the graffiti scene of Kreuzberg of the 1980s. The Berlin Wall itself has become one of the largest open-air canvasses in the world. The leftover stretch al

Trace: query
    |_query ->  8.980868 seconds
      |_retrieve ->  0.507406 seconds
        |_embedding ->  0.487349 seconds
      |_synthesize ->  8.472454 seconds
        |_templating ->  0.0 seconds
        |_llm ->  8.462205 seconds
**********


In [41]:
response.source_nodes

[NodeWithScore(node=TextNode(id_='dce60e39-d131-436f-8054-c96675f3dcbc', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='475de4bd-7590-4575-b3e0-ea59a2f42133', node_type=<ObjectType.DOCUMENT: '4'>, metadata={}, hash='bbb4fccdec26619010310ee9c02992467d2776ed6aa023471e221f1b2e014017'), <NodeRelationship.PREVIOUS: '2'>: RelatedNodeInfo(node_id='a52a6cbd-640f-43c4-af4d-db3ff1ad5a9d', node_type=<ObjectType.TEXT: '1'>, metadata={}, hash='7e530a7b3027c52f9ac4df9d10f59cde93c2589df9f1751ba4000c36ac18ede1'), <NodeRelationship.NEXT: '3'>: RelatedNodeInfo(node_id='1824b7d8-fb19-40f5-9f2a-6ed6106f5064', node_type=<ObjectType.TEXT: '1'>, metadata={}, hash='6ce867efd632efba51f308020704db1a6ba52b8efb37839bf232458caf56e088')}, hash='373a789791357c2d2c343020e4aee75f8e868b6ae073fe317086eda6f080279d', text="The Brücke Museum features one of the largest collection of works by artist of the e

In [42]:
# Try querying index without node postprocessor
query_engine = index.as_query_engine(
    similarity_top_k=1, response_mode="tree_summarize"
)
response = query_engine.query(
    "art and culture of berlin",
)

** Prompt: **
Context information from multiple sources is below.
---------------------
The Brücke Museum features one of the largest collection of works by artist of the early 20th-century expressionist movement. In Lichtenberg, on the grounds of the former East German Ministry for State Security, is the Stasi Museum. The site of Checkpoint Charlie, one of the most renowned crossing points of the Berlin Wall, is still preserved. A private museum venture exhibits a comprehensive documentation of detailed plans and strategies devised by people who tried to flee from the East.
The Beate Uhse Erotic Museum claimed to be the largest erotic museum in the world until it closed in 2014.The cityscape of Berlin displays large quantities of urban street art. It has become a significant part of the city's cultural heritage and has its roots in the graffiti scene of Kreuzberg of the 1980s. The Berlin Wall itself has become one of the largest open-air canvasses in the world. The leftover stretch al

In [44]:
response.source_nodes[0].text

"The Brücke Museum features one of the largest collection of works by artist of the early 20th-century expressionist movement. In Lichtenberg, on the grounds of the former East German Ministry for State Security, is the Stasi Museum. The site of Checkpoint Charlie, one of the most renowned crossing points of the Berlin Wall, is still preserved. A private museum venture exhibits a comprehensive documentation of detailed plans and strategies devised by people who tried to flee from the East.\nThe Beate Uhse Erotic Museum claimed to be the largest erotic museum in the world until it closed in 2014.The cityscape of Berlin displays large quantities of urban street art. It has become a significant part of the city's cultural heritage and has its roots in the graffiti scene of Kreuzberg of the 1980s. The Berlin Wall itself has become one of the largest open-air canvasses in the world. The leftover stretch along the Spree river in Friedrichshain remains as the East Side Gallery. Berlin today i

In [46]:
node_postprocessor = AutoPrevNextNodePostprocessor(
    docstore=docstore,
    num_nodes=3,
    service_context=service_context,
    verbose=True,
)

In [48]:
# Infer that we need to search nodes after current one
query_engine = index.as_query_engine(
    similarity_top_k=1,
    node_postprocessors=[node_postprocessor],
    response_mode="tree_summarize",
)


response = query_engine.query(
    "art and culture of berlin",
)

** Prompt: **
Context information from multiple sources is below.
---------------------
The Brücke Museum features one of the largest collection of works by artist of the early 20th-century expressionist movement. In Lichtenberg, on the grounds of the former East German Ministry for State Security, is the Stasi Museum. The site of Checkpoint Charlie, one of the most renowned crossing points of the Berlin Wall, is still preserved. A private museum venture exhibits a comprehensive documentation of detailed plans and strategies devised by people who tried to flee from the East.
The Beate Uhse Erotic Museum claimed to be the largest erotic museum in the world until it closed in 2014.The cityscape of Berlin displays large quantities of urban street art. It has become a significant part of the city's cultural heritage and has its roots in the graffiti scene of Kreuzberg of the 1980s. The Berlin Wall itself has become one of the largest open-air canvasses in the world. The leftover stretch al

ValueError: Invalid prediction: Berlin has a rich art and culture scene. It is home to many museums, galleries, and other cultural institutions. The city is also known for its nightlife, which is one of the most diverse and vibrant in the world.

In [49]:
from llama_index.postprocessor import LongContextReorder

reorder = LongContextReorder()

reorder_engine = index.as_query_engine(
    node_postprocessors=[reorder], similarity_top_k=5
)
base_engine = index.as_query_engine(similarity_top_k=5)

In [53]:
from llama_index.response.notebook_utils import display_response

base_response = index.as_retriever(similarity_top_k=5).retrieve("Did the author meet Sam Altman?")
base_response

**********
Trace: query
    |_retrieve ->  0.494322 seconds
      |_embedding ->  0.483324 seconds
**********


[NodeWithScore(node=TextNode(id_='e6b56b43-e05f-42fd-a21d-87e298e39aa0', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='475de4bd-7590-4575-b3e0-ea59a2f42133', node_type=<ObjectType.DOCUMENT: '4'>, metadata={}, hash='bbb4fccdec26619010310ee9c02992467d2776ed6aa023471e221f1b2e014017'), <NodeRelationship.PREVIOUS: '2'>: RelatedNodeInfo(node_id='6cc044bd-53a5-4149-a284-66f834a131c2', node_type=<ObjectType.TEXT: '1'>, metadata={}, hash='7299e963019cd042c018bac2b0203bdf946d2534c61bc266cc7cdf20a8035c06'), <NodeRelationship.NEXT: '3'>: RelatedNodeInfo(node_id='7ef7ed67-3890-40a1-a3b0-dff7023d4a02', node_type=<ObjectType.TEXT: '1'>, metadata={}, hash='292bd42f12a68eb57141ad1a4e9175d66639c73ec05feb2a0b401377accdb5f3')}, hash='ebe04da3c6ebc6f5a1fd54af697fc397d11d6aa303a64dc94ba35d4fb6fc9796', text="Due to the fall in passenger numbers resulting from the COVID-19 pandemic, plans wer

In [55]:
for i in base_response:
    print(i.node.id_, i.score)

e6b56b43-e05f-42fd-a21d-87e298e39aa0 0.5681839492069704
58846829-0505-4f5e-8112-a48969a03132 0.5618988652006635
85d5fbf3-4fbb-48cd-a605-76637a40f86f 0.5587181662488196
a0f52ae3-7d3e-4fd2-ad38-dae2350e096d 0.5521701557402982
554a2114-b687-4856-b637-df0579227b68 0.5495177706280489


In [57]:
rea = base_engine.query("Did the author meet Sam Altman?")

**********
Trace: query
    |_query ->  3.579554 seconds
      |_retrieve ->  0.492163 seconds
        |_embedding ->  0.486166 seconds
      |_synthesize ->  3.087391 seconds
        |_templating ->  0.0 seconds
        |_llm ->  0.0 seconds
**********


IndexError: list index out of range