In [None]:
## Retrieval augmented generation

import os
from dotenv import load_dotenv
load_dotenv()

In [None]:
os.environ['OPENAI_API_KEY']=os.getenv("OPENAI_API_KEY")

In [23]:
from llama_index.core import VectorStoreIndex,SimpleDirectoryReader
documents=SimpleDirectoryReader("data").load_data()

In [24]:
index=VectorStoreIndex.from_documents(documents, show_progress=True)

Parsing nodes: 100%|██████████| 21/21 [00:00<00:00, 1264.41it/s]
Generating embeddings: 100%|██████████| 21/21 [00:02<00:00,  9.15it/s]


In [42]:
query_engine=index.as_query_engine()
response = query_engine.query("where to report BOI?")
print(response)

Reporting companies can report Beneficial Ownership Information (BOI) electronically by accessing the BOI E-Filing portal at http://boiefiling.fincen.gov. The E-Filing portal allows companies to choose between uploading a finalized PDF version of the BOIR and submitting it online, or filling out the web-based version of the BOIR and submitting it online. Both methods require online submission as BOIRs cannot be mailed or faxed to FinCEN.


In [29]:
from llama_index.core.response.pprint_utils import pprint_response
pprint_response(response, show_source=True)
print(response)

Final Response: Reporting companies can report Beneficial Ownership
Information (BOI) electronically by accessing the BOI E-Filing portal
at http://boiefiling.fincen.gov. They can choose to submit a finalized
PDF version of BOI or fill out the web-based version of BOI on the
portal. Both methods require online submission as BOIs cannot be
mailed or faxed to FinCEN.
______________________________________________________________________
Source Node 1/2
Node ID: 712143a1-38ed-4ffd-bce8-ae610f6c1f7d
Similarity: 0.8393212855648444
Text: Beneficial Ownership Information Reporting Filing Instructions
January 2024 - Version 1.0 5 II. Where to Report Beneficial Ownership
Information  Reporting       companies       may     complete
BOIRs   electronically  by      accessing       the     BOI
E-Filing        portal  at       http    s://boiefiling.fincen.gov
(accessible     beginning       on      January 1,      2024). The
E-Filing        portal  permits a       r...
____________________________

In [None]:
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.indices.postprocessor import SimilarityPostprocessor

retriever=VectorIndexRetriever(index=index,similarity_top_k=4)
postprocessor=SimilarityPostprocessor(similarity_cutoff=0.80)

query_engine=RetrieverQueryEngine(retriever=retriever,node_postprocessors=[postprocessor])
response=query_engine.query("How to file BOI?")

In [45]:
from llama_index.core.response.pprint_utils import pprint_response
pprint_response(response,show_source=True)
print(response)

Final Response: To file a Beneficial Ownership Information Report
(BOIR), reporting companies can choose to complete the filing
electronically through the BOI E-Filing portal. They have two options
for submitting a BOIR: uploading a finalized PDF version of the BOIR
and submitting it online, or filling out the web-based version of the
BOIR and submitting it online. Both methods require the filing to be
done online, as BOIRs cannot be mailed or faxed to FinCEN. After
submission, the person filing the BOIR will receive confirmation from
FinCEN once the report is accepted. Additionally, FinCEN offers
system-to-system BOIR transmission via a secure Application
Programming Interface (API) for those interested in automating the
filing process.
______________________________________________________________________
Source Node 1/4
Node ID: 81635825-fd50-410e-8d84-edfea190ddbf
Similarity: 0.8596716477928947
Text: Beneficial Ownership Information Reporting Filing Instructions
January 2024 - Vers

In [39]:
import os.path
from llama_index.core import (
    VectorStoreIndex,
    SimpleDirectoryReader,
    StorageContext,
    load_index_from_storage,
)

# check if storage already exists
PERSIST_DIR = "./storage"
if not os.path.exists(PERSIST_DIR):
    # load the documents and create the index
    documents = SimpleDirectoryReader("data").load_data()
    index = VectorStoreIndex.from_documents(documents)
    # store it for later
    index.storage_context.persist(persist_dir=PERSIST_DIR)
else:
    # load the existing index
    storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
    index = load_index_from_storage(storage_context)

# either way we can now query the index
query_engine = index.as_query_engine()
response = query_engine.query("What are prohibitad words and phrases are while reporting BOI?")
print(response)

The prohibited words and phrases while reporting BOI are:
- AKA
- DBA
- NMN
- NONE
- NOT APPLICABLE
- OTHER
- SAME
- SAME AS ABOVE
- SEE ABOVE
- T/A
- UNKNOWN
- VARIOUS
- XX
