# Chat With Your Data

## Persist Data to Vector Stores

# Install libraries

In [None]:
pip install openai

In [None]:
pip install python-dotenv

In [None]:
pip install langchain

In [None]:
pip install langchain-openai

In [None]:
pip install pypdf

In [None]:
pip install faiss-cpu

In [None]:
pip install langchainhub

In [None]:
pip install langchain-community

## Load OpenAI API Key to use OpenAI's embedding model

In [1]:
import os

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

In [2]:
OPENAI_API_KEY=os.environ['OPENAI_API_KEY']

## Load documents

In [3]:
from langchain.document_loaders import PyPDFLoader

loader = PyPDFLoader('Visa Statement-4503 2024-11-18.pdf')
pages = loader.load()

## Chunk documents

In [4]:
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import CharacterTextSplitter

# Load the document, split it into chunks
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
documents = text_splitter.split_documents(pages)

In [11]:
with open('RBC-11182024.txt','wt+') as f:
  for doc in documents:
    f.write(doc.page_content)

In [16]:
len(documents)

5

In [20]:
print(documents[3].page_content)

Signature® RBC Rewards® VisaJ
FARAMARZ AMIRSHAHI 4510 15** **** 4503
HAPPIE CLARA TESTA 4510 15** **** 0190
STATEMENT FROM OCT 17 TO NOV 18, 2024 4 OF 4
to illustrate how making only the Minimum Payment will increase the time
it takes to pay your balance and is not a recommended long term
repayment plan.
INTEREST RATE CHART
Description Rate (%) Remaining Balance** Expiry Date
Purchases & Fees 20.99 $2,344.89
** The "Determination of Interest" section on the back of your statement explains how interest is
charged and how you may avoid interest charges on purchases and fees and the "Applying your
payments" section explains how payments are applied to the Remaining Balances shown above.


# Generate embeddings and store in vector database
## FAISS vector database

In [12]:
from langchain_community.vectorstores import FAISS
embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY, model="text-embedding-3-small")
# Load it into the vector store and embed
vectordb = FAISS.from_documents(documents, embeddings )

In [13]:
print(vectordb.index.ntotal)

5


## Persist Data in your Vector Store

In [14]:
vectordb.save_local("faiss2c_index")

## Load Vector Store

In [15]:
new_db = FAISS.load_local("faiss2c_index", embeddings, allow_dangerous_deserialization=True)