In [1]:
import os
import nest_asyncio
nest_asyncio.apply()

from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core import VectorStoreIndex
from llama_index.core import Settings
from llama_index.core import StorageContext
from llama_index.core.node_parser import MarkdownElementNodeParser

from llama_parse import LlamaParse
from llama_index.vector_stores.astra import AstraDBVectorStore
from llama_index.postprocessor.flag_embedding_reranker import FlagEmbeddingReranker

In [2]:
OPENAI_API_KEY = ""
LLAMA_CLOUD_API_KEY = ""

ASTRA_TOKEN = ""
ASTRA_API_ENDPOINT = ""
ASTRA_NAMESPACE = ""

In [3]:
embed_model = OpenAIEmbedding(model="text-embedding-3-small")
llm = OpenAI(model="gpt-4o-mini")

Settings.llm = llm
Settings.embed_model = embed_model

In [8]:
documents = LlamaParse(result_type="markdown").load_data("./senate-legislative/DTI submission EMB Submission re CREATE MORE.pdf")

Started parsing the file under job_id e0e4dc42-66d8-4261-be87-5ee5efe71668


In [10]:
print(documents[0].text[:1000])

# 1. Number of entities registered with Export Marketing Bureau from 2018 to 2024

(Sec. 6 amending Sec. 293 (M));

- DTI-EMB accredited a total of 77 exporters based on its 2021 data. These are export enterprises that do not qualify for registrations under PEZA or BOI, but are considered export-oriented by virtue of their export revenues constituting more than 70% of their total gross revenues;
- DTI-EMB terminated its accreditation services on February 2022, upon issuance of BIR Revenue Regulation (RR) 21-2021 and Revenue Memorandum Circular (RMC) 24-2022 which implemented CREATE’s IRR provisions. The CREATE Law also does not list DTI-EMB as an Investment Promotion Agency (IPA) where local enterprises can register as an “export enterprise” (REE) in order to avail of fiscal and non-fiscal incentives under CREATE Law.
- Out of the 77 companies registered in 2021, 88% are considered MSMEs and 12% are Large enterprises.
- The total export sales attributable to these EMB-accredited compan

In [27]:
astra_db_store_advanced = AstraDBVectorStore(
    token=ASTRA_TOKEN,
    api_endpoint=ASTRA_API_ENDPOINT,
    namespace=ASTRA_NAMESPACE,
    collection_name="astra_v_table_llamaparse_advanced_new",
    embedding_dimension=1536,
)

astra_db_store_base = AstraDBVectorStore(
    token=ASTRA_TOKEN,
    api_endpoint=ASTRA_API_ENDPOINT,
    namespace=ASTRA_NAMESPACE,
    collection_name="astra_v_table_llamaparse_base",
    embedding_dimension=1536,
)

In [11]:
node_parser = MarkdownElementNodeParser(llm=llm, num_workers=8)

In [12]:
nodes = node_parser.get_nodes_from_documents(documents)

0it [00:00, ?it/s]
0it [00:00, ?it/s]


In [13]:
for idx, n in enumerate(nodes):
    print(idx)
    print(n.get_content())

0
1. Number of entities registered with Export Marketing Bureau from 2018 to 2024

(Sec. 6 amending Sec. 293 (M));

- DTI-EMB accredited a total of 77 exporters based on its 2021 data. These are export enterprises that do not qualify for registrations under PEZA or BOI, but are considered export-oriented by virtue of their export revenues constituting more than 70% of their total gross revenues;
- DTI-EMB terminated its accreditation services on February 2022, upon issuance of BIR Revenue Regulation (RR) 21-2021 and Revenue Memorandum Circular (RMC) 24-2022 which implemented CREATE’s IRR provisions. The CREATE Law also does not list DTI-EMB as an Investment Promotion Agency (IPA) where local enterprises can register as an “export enterprise” (REE) in order to avail of fiscal and non-fiscal incentives under CREATE Law.
- Out of the 77 companies registered in 2021, 88% are considered MSMEs and 12% are Large enterprises.
- The total export sales attributable to these EMB-accredited compan

In [14]:
base_nodes, objects = node_parser.get_nodes_and_objects(nodes)

In [22]:
base_nodes[0].metadata = {
    "document_title":"DTI submission EMB Submission re CREATE MORE"
}

In [25]:
base_nodes[0].metadata

{'document_title': 'DTI submission EMB Submission re CREATE MORE'}

In [28]:
storage_context_advanced = StorageContext.from_defaults(vector_store=astra_db_store_advanced)
storage_context_base = StorageContext.from_defaults(vector_store=astra_db_store_base)

recursive_index = VectorStoreIndex(nodes=base_nodes+objects, storage_context=storage_context_advanced)
raw_index = VectorStoreIndex.from_documents(documents, storage_context=storage_context_base)

In [29]:
reranker = FlagEmbeddingReranker(
    top_n=3,
    model="BAAI/bge-reranker-large",
)

recursive_query_engine = recursive_index.as_query_engine(
    similarity_top_k=5,
    node_postprocessors=[reranker],
    verbose=True
)

raw_query_engine = raw_index.as_query_engine(similarity_top_k=5, node_postprocessors=[reranker])

In [30]:
query = "What is the timeline for incentives under EDA?"

response_1 = raw_query_engine.query(query)
print("LlamaParse + Basic Query Engine")
print(response_1)

# response_2 = recursive_query_engine.query(query)
# print("LlamaParse + Recursive Retriever Query Engine")
# print(response_2)

LlamaParse + Basic Query Engine
The timeline for incentives under the Export Development Act (EDA) was from 1995 to 1999. Specific incentives had different expiration dates, such as duty-free importation of machinery and equipment until December 31, 1997, and tax credits for imported raw materials not locally available until December 31, 1999.


In [41]:
query = "What is CREATE MORE?"

response_1 = raw_query_engine.query(query)
print("LlamaParse + Basic Query Engine")
print(response_1)

LlamaParse + Basic Query Engine
The provided context does not contain any information about "CREATE MORE." Therefore, I cannot provide an answer regarding that topic.
