### Imports

In [1]:
import os
from dotenv import load_dotenv
from llama_index import (
    VectorStoreIndex,
    SimpleDirectoryReader,
    StorageContext,
    load_index_from_storage,
)
from llama_index.multi_modal_llms.openai import OpenAIMultiModal
from llama_index.query_engine import CitationQueryEngine

load_dotenv()

True

### We'll test with chapter 14: Ionic Equilibria

In [2]:
if not os.path.exists("storage"):
    # load the documents and create the index
    documents = SimpleDirectoryReader("data/14").load_data()
    index = VectorStoreIndex.from_documents(documents)
    # persist the index
    index.storage_context.persist()
else:
    # load the index from storage
    storage_context = StorageContext.from_defaults(persist_dir="storage")
    index = load_index_from_storage(storage_context)

In [6]:
query_engine = CitationQueryEngine.from_args(
    index,
    similarity_top_k=3,
    citation_chunk_size=512,
)

### Ask a question on the topic

In [7]:
response = query_engine.query("What are some important conjugate acid-base pair related to foods?")

response

Response(response='One important conjugate acid-base pair related to foods is hypochlorous acid and hypochlorite (HClO/ClO) [1].', source_nodes=[NodeWithScore(node=TextNode(id_='eb5fb9bc-c063-43a5-b626-9b785a19d102', embedding=None, metadata={'page_label': '1', 'file_name': '14.7.1! Foods- From Cleaning and Disinfection to Microbial Nutrition and Protein Modification.pdf', 'file_path': 'data/14/14.7.1! Foods- From Cleaning and Disinfection to Microbial Nutrition and Protein Modification.pdf', 'file_type': 'application/pdf', 'file_size': 445466, 'creation_date': '2023-11-25', 'last_modified_date': '2023-11-09', 'last_accessed_date': '2023-11-25'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='4f61b01f-b288

In [9]:
# see the sources that were used to answer the query
response.source_nodes

[NodeWithScore(node=TextNode(id_='eb5fb9bc-c063-43a5-b626-9b785a19d102', embedding=None, metadata={'page_label': '1', 'file_name': '14.7.1! Foods- From Cleaning and Disinfection to Microbial Nutrition and Protein Modification.pdf', 'file_path': 'data/14/14.7.1! Foods- From Cleaning and Disinfection to Microbial Nutrition and Protein Modification.pdf', 'file_type': 'application/pdf', 'file_size': 445466, 'creation_date': '2023-11-25', 'last_modified_date': '2023-11-09', 'last_accessed_date': '2023-11-25'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='4f61b01f-b288-4212-83f2-e47a0c1ced91', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'page_label': '1', 'file_name': '14.7.1! Foods- From Cleaning and Disi