### Imports

In [1]:
import os
from dotenv import load_dotenv
from llama_index import (
    VectorStoreIndex,
    SimpleDirectoryReader,
    StorageContext,
    load_index_from_storage,
)
from llama_index.query_engine import CitationQueryEngine

load_dotenv()

True

### We'll test with chapter 14: Ionic Equilibria

In [2]:
if not os.path.exists("storage"):
    # load the documents and create the index
    documents = SimpleDirectoryReader("data/14").load_data()
    index = VectorStoreIndex.from_documents(documents)
    # persist the index
    index.storage_context.persist()
else:
    # load the index from storage
    storage_context = StorageContext.from_defaults(persist_dir="storage")
    index = load_index_from_storage(storage_context)

In [3]:
query_engine = CitationQueryEngine.from_args(
    index,
    similarity_top_k=3,
    citation_chunk_size=512,
)

### Ask a question on the topic

In [4]:
response = query_engine.query("What are some important conjugate acid-base pair related to foods?")

response

Response(response='One important conjugate acid-base pair related to foods is hypochlorous acid and hypochlorite (HClO/ClO) [1].', source_nodes=[NodeWithScore(node=TextNode(id_='eb5fb9bc-c063-43a5-b626-9b785a19d102', embedding=None, metadata={'page_label': '1', 'file_name': '14.7.1! Foods- From Cleaning and Disinfection to Microbial Nutrition and Protein Modification.pdf', 'file_path': 'data/14/14.7.1! Foods- From Cleaning and Disinfection to Microbial Nutrition and Protein Modification.pdf', 'file_type': 'application/pdf', 'file_size': 445466, 'creation_date': '2023-11-25', 'last_modified_date': '2023-11-09', 'last_accessed_date': '2023-11-25'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='4f61b01f-b288

In [5]:
# see the sources that were used to answer the query
response.source_nodes

[NodeWithScore(node=TextNode(id_='eb5fb9bc-c063-43a5-b626-9b785a19d102', embedding=None, metadata={'page_label': '1', 'file_name': '14.7.1! Foods- From Cleaning and Disinfection to Microbial Nutrition and Protein Modification.pdf', 'file_path': 'data/14/14.7.1! Foods- From Cleaning and Disinfection to Microbial Nutrition and Protein Modification.pdf', 'file_type': 'application/pdf', 'file_size': 445466, 'creation_date': '2023-11-25', 'last_modified_date': '2023-11-09', 'last_accessed_date': '2023-11-25'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='4f61b01f-b288-4212-83f2-e47a0c1ced91', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'page_label': '1', 'file_name': '14.7.1! Foods- From Cleaning and Disi

### Prompt Engineering

In [7]:
query_engine.get_prompts()

{'response_synthesizer:text_qa_template': PromptTemplate(metadata={'prompt_type': <PromptType.CUSTOM: 'custom'>}, template_vars=['context_str', 'query_str'], kwargs={}, output_parser=None, template_var_mappings=None, function_mappings=None, template="Please provide an answer based solely on the provided sources. When referencing information from a source, cite the appropriate source(s) using their corresponding numbers. Every answer should include at least one source citation. Only cite a source when you are explicitly referencing it. If none of the sources are helpful, you should indicate that. For example:\nSource 1:\nThe sky is red in the evening and blue in the morning.\nSource 2:\nWater is wet when the sky is red.\nQuery: When is water wet?\nAnswer: Water will be wet when the sky is red [2], which occurs in the evening [1].\nNow it's your turn. Below are several numbered sources of information:\n------\n{context_str}\n------\nQuery: {query_str}\nAnswer: "),
 'response_synthesizer:

In [13]:
from llama_index.prompts import PromptTemplate


qa_prompt_tmpl_str = """
You are a suportive chemistry expert who provides helpful guiding questions to help students understand the reasons behind a correct answer to exam question.
Please provide guiding questions based on the provided question and final answer, using the provided sources to support your guidance.
When referencing information from a source, cite the appropriate source(s) using their corresponding numbers. 
Every answer should include at least one source citation. 
Only cite a source when you are explicitly referencing it. 
If none of the sources are helpful, you should indicate that. 

For example:
Source 1:
The sky is red in the evening and blue in the morning.

Source 2:
Water is wet when the sky is red.

Query:
Question:
When is water wet?

Options:
A) In the morning
B) In the evening
C) In the afternoon
D) In the middle of the night

Answer: 
B) In the evening

Guiding questions:
1. When will water be wet? Water will be wet when the sky is red [2]
2. When will the sky be red? The sky is red in the evening [1].
3. Therefore, when will water be wet? Water will be wet in the evening.

Now it's your turn. Below are several numbered sources of information:
------
{context_str}
------
Query:
{query_str}

Answer: 
"""

prompt_tmpl = PromptTemplate(
    qa_prompt_tmpl_str,
)

query_engine.update_prompts(
    {"response_synthesizer:text_qa_template": prompt_tmpl}
)

In [14]:
response = query_engine.query("""
Question:
What are some important conjugate acid-base pair related to foods?

Options:
A) NaCl
B) HCl
C) HCl0
D) H2SO4

Answer:
C) HCl0
""")
response

Response(response='C) HCl0\n\nGuiding questions:\n1. What is the definition of a conjugate acid-base pair? According to Source 1, a conjugate acid-base pair refers to two species that differ by a proton (H+) [1].\n2. Which of the options is a conjugate acid-base pair? According to Source 1, hypochlorous acid (HClO) and hypochlorite (ClO-) form a conjugate acid-base pair [1].\n3. Therefore, which option is an important conjugate acid-base pair related to foods? The important conjugate acid-base pair related to foods is HClO [1].', source_nodes=[NodeWithScore(node=TextNode(id_='eb5fb9bc-c063-43a5-b626-9b785a19d102', embedding=None, metadata={'page_label': '1', 'file_name': '14.7.1! Foods- From Cleaning and Disinfection to Microbial Nutrition and Protein Modification.pdf', 'file_path': 'data/14/14.7.1! Foods- From Cleaning and Disinfection to Microbial Nutrition and Protein Modification.pdf', 'file_type': 'application/pdf', 'file_size': 445466, 'creation_date': '2023-11-25', 'last_modifie

### Formatting the response

In [25]:
print(response.response)

C) HCl0

Guiding questions:
1. What is the definition of a conjugate acid-base pair? According to Source 1, a conjugate acid-base pair refers to two species that differ by a proton (H+) [1].
2. Which of the options is a conjugate acid-base pair? According to Source 1, hypochlorous acid (HClO) and hypochlorite (ClO-) form a conjugate acid-base pair [1].
3. Therefore, which option is an important conjugate acid-base pair related to foods? The important conjugate acid-base pair related to foods is HClO [1].


In [36]:
print(response.source_nodes)

[NodeWithScore(node=TextNode(id_='eb5fb9bc-c063-43a5-b626-9b785a19d102', embedding=None, metadata={'page_label': '1', 'file_name': '14.7.1! Foods- From Cleaning and Disinfection to Microbial Nutrition and Protein Modification.pdf', 'file_path': 'data/14/14.7.1! Foods- From Cleaning and Disinfection to Microbial Nutrition and Protein Modification.pdf', 'file_type': 'application/pdf', 'file_size': 445466, 'creation_date': '2023-11-25', 'last_modified_date': '2023-11-09', 'last_accessed_date': '2023-11-25'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='4f61b01f-b288-4212-83f2-e47a0c1ced91', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'page_label': '1', 'file_name': '14.7.1! Foods- From Cleaning and Disi

In [30]:
len(response.source_nodes)

5

In [38]:
for node in response.source_nodes:
    page_label = node.node.metadata['page_label']
    file_name = node.node.metadata['file_name']

    print(f"Page {page_label}")
    print(f"File {file_name}")
    print(f"Url https://github.com/gabyang/tango-ai/tree/main/backend/data/14/{file_name.replace(' ', '%20')}")

Page 1
File 14.7.1! Foods- From Cleaning and Disinfection to Microbial Nutrition and Protein Modification.pdf
Url https://github.com/gabyang/tango-ai/tree/main/backend/data/14/14.7.1!%20Foods-%20From%20Cleaning%20and%20Disinfection%20to%20Microbial%20Nutrition%20and%20Protein%20Modification.pdf
Page 1
File 14.7.1! Foods- From Cleaning and Disinfection to Microbial Nutrition and Protein Modification.pdf
Url https://github.com/gabyang/tango-ai/tree/main/backend/data/14/14.7.1!%20Foods-%20From%20Cleaning%20and%20Disinfection%20to%20Microbial%20Nutrition%20and%20Protein%20Modification.pdf
Page 1
File 14.7.1! Foods- From Cleaning and Disinfection to Microbial Nutrition and Protein Modification.pdf
Url https://github.com/gabyang/tango-ai/tree/main/backend/data/14/14.7.1!%20Foods-%20From%20Cleaning%20and%20Disinfection%20to%20Microbial%20Nutrition%20and%20Protein%20Modification.pdf
Page 5
File 14.7.1! Foods- From Cleaning and Disinfection to Microbial Nutrition and Protein Modification.pdf
Ur