In [1]:
from langchain_community.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import OpenAIEmbeddings 
from langchain_community.vectorstores import FAISS

In [2]:
loader = DirectoryLoader(path="amazon-docs")
documents = loader.load()
print(f"{len(documents)} Pages Loaded")

74 Pages Loaded


In [9]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500, chunk_overlap=50, separators=["\n\n", "\n", " ", ""]        
)

In [10]:
split_documents = text_splitter.split_documents(documents=documents)
print(f"Split into {len(split_documents)} Documents...")

print(split_documents[0].metadata)

# Upload chunks as vector embeddings into FAISS
embeddings = OpenAIEmbeddings()
db = FAISS.from_documents(split_documents, embeddings)
# Save the FAISS DB locally
db.save_local("faiss_amazon_index")

Split into 1540 Documents...
{'source': 'amazon-docs\\_.html'}


In [11]:
embeddings = OpenAIEmbeddings()
print(split_documents[0].metadata)
# Load the FAISS DB from local
new_db = FAISS.load_local("faiss_amazon_index", embeddings, allow_dangerous_deserialization=True)


{'source': 'amazon-docs\\_.html'}


In [13]:

test_query = "What are the benefits Amazon employees can start using on the first day?"
docs = new_db.similarity_search(test_query)

In [14]:
 # Print all the extracted Vectors from the above Query
for doc in docs:
    print("##---- Page ---##")
    print(doc.metadata['source'])
    print("##---- Content ---##")
    print(doc.page_content)

##---- Page ---##
amazon-docs\_news_workplace_amazon-health-benefits.html
##---- Content ---##
In addition to a slew of perks that include an employee discount, education benefits, and paid family leave (just to name a few), Amazon has some great health benefits, too. Unlike many employer health benefits, you can start using the ones at Amazon right from your very first day on the job, with no waiting period.

14 new benefits Amazon is offering employees—from free estate planning to free mental health care
##---- Page ---##
amazon-docs\_workplace_employee-benefits.html
##---- Content ---##
Employee Benefits

Along with average hourly pay of over $20.50, Amazon offers a range of great benefits that support employees and eligible family members, including domestic partners and their children. These comprehensive benefits begin on day one and include health care coverage, paid parental leave, ways to save for the future, paid college tuition, and other resources to improve health and well