# Querying

3 main steps for querying

1. retrieveing the relevant nodes
2. post-processing: filtering on keywords or similarity scores
3. response synthesis

In [24]:
from llama_index.core import StorageContext, load_index_from_storage
from llama_index.core import VectorStoreIndex
from llama_index.core.schema import MetadataMode
from llama_index.llms.openai import OpenAI

storage_context = StorageContext.from_defaults(persist_dir="./storage")
index = load_index_from_storage(storage_context=storage_context)

In [25]:
index.ref_doc_info

{'f9b3bf49-8018-41ac-9313-6cfc18491216': RefDocInfo(node_ids=['642e947c-4c86-48c6-956a-15fa83b7ac35', '10c9a4d3-2a1b-4fae-bc55-b31082c70aa6', 'b4721a92-cd08-44fc-8e3a-f137d037676d', 'd03763d2-7b94-4032-9321-11a8283cfe25', '7aea43e9-d7db-4d46-898c-c233b3954ce4', 'b994bc38-0b36-4ad5-b1d4-92690462a19b', '9735c4ac-0c08-4d7f-9d2d-32d26892290c', 'c15d950d-0527-4c10-a314-258ec3412740', '4e44d91c-d20b-4caf-8e06-2d28c647fa1d', '98e89a5e-219e-4020-a823-d848dafc5084', '1eb28048-bcab-45fd-8b44-bf75d6beb190', '5ce487d1-f359-4ad6-9884-5a8f908e88b3', '9b49fc59-1679-44d7-b76f-ee6490ef2ff7', '5e07f79e-1d90-445a-9234-34ec5740e1e0', '8498d5fb-093d-436c-ad2b-b6a47d86240a', 'c7f8260f-2d0a-42dd-8b3b-ea365c0ac647', '3e9d169f-e301-486e-8fa1-ce30ccd12d95', '928b1455-9f8d-4a1f-925d-19faa903685c', '1bea3f2d-5011-4f6a-8b8e-71bd2ed805d8', '29526681-8568-4743-b56c-6266f96bc337', '252bbb52-1328-46b1-ad74-b62968484fd1', '43528ae4-90f2-442f-a5f3-37a81c604c3b'], metadata={'file_path': '/Users/mohamedadelabdelhady/works

In [63]:
v = index.ref_doc_info.values()
len(list(v)[0].to_dict()["node_ids"])

22

In [26]:
query_engine = index.as_query_engine()

In [10]:
response = query_engine.query("Where did the author study?")
print(response)

The author studied in a PhD program in computer science and later attended RISD (Rhode Island School of Design) for the BFA program.


In [42]:
len(response.source_nodes)

2

In [70]:
for n in response.source_nodes:
    print(n)

Node ID: d03763d2-7b94-4032-9321-11a8283cfe25
Text: If he even knew about the strange classes I was taking, he never
said anything.  So now I was in a PhD program in computer science, yet
planning to be an artist, yet also genuinely in love with Lisp hacking
and working away at On Lisp. In other words, like many a grad student,
I was working energetically on multiple projects that were not my
the...
Score:  0.835

Node ID: 642e947c-4c86-48c6-956a-15fa83b7ac35
Text: What I Worked On  February 2021  Before college the two main
things I worked on, outside of school, were writing and programming. I
didn't write essays. I wrote what beginning writers were supposed to
write then, and probably still are: short stories. My stories were
awful. They had hardly any plot, just characters with strong feelings,
which I ...
Score:  0.822



In [12]:
query_engine.get_prompts()

{'response_synthesizer:text_qa_template': SelectorPromptTemplate(metadata={'prompt_type': <PromptType.QUESTION_ANSWER: 'text_qa'>}, template_vars=['context_str', 'query_str'], kwargs={}, output_parser=None, template_var_mappings={}, function_mappings={}, default_template=PromptTemplate(metadata={'prompt_type': <PromptType.QUESTION_ANSWER: 'text_qa'>}, template_vars=['context_str', 'query_str'], kwargs={}, output_parser=None, template_var_mappings=None, function_mappings=None, template='Context information is below.\n---------------------\n{context_str}\n---------------------\nGiven the context information and not prior knowledge, answer the query.\nQuery: {query_str}\nAnswer: '), conditionals=[(<function is_chat_model at 0x127420160>, ChatPromptTemplate(metadata={'prompt_type': <PromptType.CUSTOM: 'custom'>}, template_vars=['context_str', 'query_str'], kwargs={}, output_parser=None, template_var_mappings=None, function_mappings=None, message_templates=[ChatMessage(role=<MessageRole.SYS

### Custom stages

instead of calling index.as_query_engine() you can also define the query engine more explicitly

In [78]:
from llama_index.core import VectorStoreIndex, get_response_synthesizer, StorageContext, load_index_from_storage
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.postprocessor import SimilarityPostprocessor, KeywordNodePostprocessor
from llama_index.core.response_synthesizers import ResponseMode

storage_context = StorageContext.from_defaults(persist_dir="./storage")
index = load_index_from_storage(storage_context)

retriever = VectorIndexRetriever(index=index, similarity_top_k=5)
response_synthesizer = get_response_synthesizer(response_mode=ResponseMode.NO_TEXT)

# assemble the query engine
query_engine = RetrieverQueryEngine(retriever=retriever, response_synthesizer=response_synthesizer, node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=0.821)])

In [79]:
response = query_engine.query("Where did the author study?")
print(response)

None


In [80]:
for n in response.source_nodes:
    print(n)

Node ID: d03763d2-7b94-4032-9321-11a8283cfe25
Text: If he even knew about the strange classes I was taking, he never
said anything.  So now I was in a PhD program in computer science, yet
planning to be an artist, yet also genuinely in love with Lisp hacking
and working away at On Lisp. In other words, like many a grad student,
I was working energetically on multiple projects that were not my
the...
Score:  0.835

Node ID: 642e947c-4c86-48c6-956a-15fa83b7ac35
Text: What I Worked On  February 2021  Before college the two main
things I worked on, outside of school, were writing and programming. I
didn't write essays. I wrote what beginning writers were supposed to
write then, and probably still are: short stories. My stories were
awful. They had hardly any plot, just characters with strong feelings,
which I ...
Score:  0.822



In [82]:
n.metadata

{'file_path': '/Users/mohamedadelabdelhady/workspace/kaggle-sandbox/llm-playground/data/paul_graham_essay.txt',
 'file_name': 'paul_graham_essay.txt',
 'file_type': 'text/plain',
 'file_size': 75041,
 'creation_date': '2024-05-03',
 'last_modified_date': '2024-05-02'}