In [None]:
%pip install -q langchain
%pip install -q sentence-transformers
%pip install -q chromadb
%pip install -q pypdf

In [1]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceInferenceAPIEmbeddings
from langchain.vectorstores import Chroma

from langchain import HuggingFaceHub
from langchain.chains import RetrievalQA
from langchain_community.document_loaders import PyPDFLoader

In [2]:
import os
from getpass import getpass

hf_token = "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"

In [3]:
os.environ["HUGGINGFACEHUB_API_TOKEN"]  = hf_token

In [7]:
data = PyPDFLoader("../Test pdfs/awsgsg-intro.pdf")

content = data.load()

In [8]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=256,chunk_overlap=20)

In [9]:
chunking = text_splitter.split_documents(content)

In [10]:
len(chunking)

228

In [11]:
chunking[3]

Document(page_content='Amaz on.com g raphics , logos , page headers , button icons , scripts, and ser vice names are tr ademar ks, or tr ade dress of Amaz on in', metadata={'source': '../Test pdfs/awsgsg-intro.pdf', 'page': 1})

#Embedding Model

In [12]:
embeddings = HuggingFaceInferenceAPIEmbeddings(
    api_key = hf_token , model_name = 'BAAI/bge-base-en-v1.5'
)

In [13]:
vectorstore = Chroma.from_documents(chunking,embeddings)



In [20]:
query = "Tell about the pdf given to you?"

prompt = f"""
 <|system|>
You are an AI assistant that follows instruction extremely well.
Please be truthful and give direct answers
</s>
 <|user|>
 {query}
 </s>
 <|assistant|>
"""

In [21]:
retriever = vectorstore.as_retriever(
    search_type="mmr", #similarity
    search_kwargs={'k': 20}
)

retriever.get_relevant_documents(query)

[Document(page_content='Table of Contents\nGetting Star ted with A WS...............................................................................................................1', metadata={'page': 2, 'source': '../Test pdfs/awsgsg-intro.pdf'}),
 Document(page_content='You can also clic k Current Statement  on the same page and then scroll do wn to y our ear liest statement\nto see when y ou created y our account and ho w much time y ou ha ve left on the free tier .\nNote', metadata={'page': 14, 'source': '../Test pdfs/awsgsg-intro.pdf'}),
 Document(page_content='small amounts of data and tr affic. For details about limits under the free tier f or an individual ser vice, go\nto AWS F ree Usage Tier, and then clic k the ser vice that y ou’re interested in.', metadata={'page': 14, 'source': '../Test pdfs/awsgsg-intro.pdf'}),
 Document(page_content='4.Click Make Pub lic.\n5.Click OK to confir m making the file pub lic.\nAnyone on the Inter net can no w vie w and/or do wnload the file 

In [22]:
model = HuggingFaceHub(
    repo_id = "huggingfaceh4/zephyr-7b-alpha",
    model_kwargs={"temperature": 0.2, "max_length":1024,"max_new_tokens":4096}
)

In [23]:
qa = RetrievalQA.from_chain_type(llm=model,retriever = retriever, chain_type = "stuff")

In [24]:
response = qa(query)

In [25]:
print(response['result'])

Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

Table of Contents
Getting Star ted with A WS...............................................................................................................1

You can also clic k Current Statement  on the same page and then scroll do wn to y our ear liest statement
to see when y ou created y our account and ho w much time y ou ha ve left on the free tier .
Note

small amounts of data and tr affic. For details about limits under the free tier f or an individual ser vice, go
to AWS F ree Usage Tier, and then clic k the ser vice that y ou’re interested in.

4.Click Make Pub lic.
5.Click OK to confir m making the file pub lic.
Anyone on the Inter net can no w vie w and/or do wnload the file .This tr affic counts against y our free usage

Make a File Priv ate
If you've made a file pub lic, you can retur n it to its pr ivate condit