In [1]:
from langchain_community.document_loaders import PyPDFLoader




In [2]:
file_path = "./resource/PrivacyPolicyTemplate.pdf"
loader = PyPDFLoader(file_path)

docs = loader.load()

print(len(docs))


36


In [3]:
print(docs[0].page_content[0:100])
print(docs[0].metadata)


H o w
t o
e d i t
t h i s
t e m p l a t e
f o r
y o u r
b u s i n e s s :
1 .
C l i c k
F i l e
→
2 
{'source': './resource/PrivacyPolicyTemplate.pdf', 'page': 0}


In [4]:
__import__('pysqlite3')
import sys
sys.modules['sqlite3'] = sys.modules["pysqlite3"]
import getpass
import os

os.environ["GROQ_API_KEY"] = "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"

from langchain_groq import ChatGroq


llm = ChatGroq(model="llama3-8b-8192")

In [5]:


from langchain.embeddings import HuggingFaceEmbeddings
import faiss

from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.vectorstores import FAISS

from uuid import uuid4



# text_splitter=CharacterTextSplitter(separator="\n",
#                                         chunk_size=1000,
#                                         chunk_overlap=200,
#                                         length_function=len)   
# chunks=text_splitter.split_text(raw_text)
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2",
                                     model_kwargs={'device':'cpu'})
# Create vectors

index = faiss.IndexFlatL2(len(embeddings.embed_query("hello world")))


vector_store = FAISS(
    embedding_function=embeddings,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={},
)

# vectorstore = faiss.FAISS.from_documents(docs, embeddings)

uuids = [str(uuid4()) for _ in range(len(docs))]

vector_store.add_documents(documents=docs, ids=uuids)


retriever = vector_store.as_retriever()




  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2",
  from tqdm.autonotebook import tqdm, trange


In [10]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)


question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

results = rag_chain.invoke({"input": "What was your right?"})


results

{'input': 'What was your right?',
 'context': [Document(metadata={'source': './resource/PrivacyPolicyTemplate.pdf', 'page': 21}, page_content='H o w e v e r ,\np l e a s e\nn o t e\nt h a t\nt h i s\nw i l l\nn o t\na f f e c t\nt h e\nl a w f u l n e s s\no f\nt h e\np r o c e s s i n g\nb e f o r e\ni t s\nw i t h d r a w a l\nn o r ,\nw h e n\na p p l i c a b l e\nl a w\na l l o w s ,\nw i l l\ni t\na f f e c t\nt h e\np r o c e s s i n g\no f\ny o u r\np e r s o n a l\ni n f o r m a t i o n\nc o n d u c t e d\ni n\nr e l i a n c e\no n\nl a w f u l\np r o c e s s i n g\ng r o u n d s\no t h e r\nt h a n\nc o n s e n t .\nO p t i n g\no u t\no f\nm a r k e t i n g\na n d\np r o m o t i o n a l\nc o m m u n i c a t i o n s :\nY o u\nc a n\nu n s u b s c r i b e\nf r o m\no u r\nm a r k e t i n g\na n d\np r o m o t i o n a l\nc o m m u n i c a t i o n s\na t\na n y\nt i m e\nb y\nc l i c k i n g\no n\nt h e\nu n s u b s c r i b e\nl i n k\ni n\nt h e\ne m a i l s\nt h a t\nw e\ns e n

In [11]:
print(results["context"][0].page_content)




H o w e v e r ,
p l e a s e
n o t e
t h a t
t h i s
w i l l
n o t
a f f e c t
t h e
l a w f u l n e s s
o f
t h e
p r o c e s s i n g
b e f o r e
i t s
w i t h d r a w a l
n o r ,
w h e n
a p p l i c a b l e
l a w
a l l o w s ,
w i l l
i t
a f f e c t
t h e
p r o c e s s i n g
o f
y o u r
p e r s o n a l
i n f o r m a t i o n
c o n d u c t e d
i n
r e l i a n c e
o n
l a w f u l
p r o c e s s i n g
g r o u n d s
o t h e r
t h a n
c o n s e n t .
O p t i n g
o u t
o f
m a r k e t i n g
a n d
p r o m o t i o n a l
c o m m u n i c a t i o n s :
Y o u
c a n
u n s u b s c r i b e
f r o m
o u r
m a r k e t i n g
a n d
p r o m o t i o n a l
c o m m u n i c a t i o n s
a t
a n y
t i m e
b y
c l i c k i n g
o n
t h e
u n s u b s c r i b e
l i n k
i n
t h e
e m a i l s
t h a t
w e
s e n d ,
r e p l y i n g
“ S T O P ”
o r
“ U N S U B S C R I B E ”
t o
t h e
S M S
m e s s a g e s
t h a t
w e
s e n d ,
[ O t h e r ]
o r
b y
c o n t a c t i n g
u s
u s i n g
t h e
d e t a i l s
p r o v i d e d
i n


In [9]:
print(results["context"][0].page_content)




■
A d
N e t w o r k s
■
A ﬃ l i a t e
M a r k e t i n g
P r o g r a m s
■
D a t a
A n a l y t i c s
S e r v i c e s
■
R e t a r g e t i n g
P l a t f o r m s
■
S o c i a l
N e t w o r k s
■
U s e r
A c c o u n t
R e g i s t r a t i o n
&
A u t h e n t i c a t i o n
S e r v i c e s
■
[ O t h e r ]
T h e
c a t e g o r i e s
o f
t h i r d
p a r t i e s
t o
w h o m
w e
s h a r e d
p e r s o n a l
i n f o r m a t i o n
w i t h
a r e :
■
A d
N e t w o r k s
■
A ﬃ l i a t e
M a r k e t i n g
P r o g r a m s
■
D a t a
A n a l y t i c s
S e r v i c e s
■
R e t a r g e t i n g
P l a t f o r m s
■
S o c i a l
N e t w o r k s
■
U s e r
A c c o u n t
R e g i s t r a t i o n
&
A u t h e n t i c a t i o n
S e r v i c e s
■
[ O t h e r ]
[
N O T I C E
:
T h i s
w e b s i t e
m a y
s e l l
y o u r
s e n s i t i v e
p e r s o n a l
i n f o r m a t i o n . ]
[
N O T I C E
:
T h i s
w e b s i t e
m a y
s e l l
y o u r
b i o m e t r i c
p e r s o n a l
i n f o r m a t i o n . ] ]
Y o u r
R i g h t s
Y o u
