In [140]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA

In [141]:

## Read the ppdfs from the folder
loader=PyPDFDirectoryLoader("./knowledge_base")

documents=loader.load()

text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)

final_documents=text_splitter.split_documents(documents)
final_documents[0]

Document(metadata={'source': 'knowledge_base/The Metamorphosis.pdf', 'page': 0}, page_content='J. Basic. Appl. Sci. Res. , 2(2)1600 -1607 , 2012  \n© 2012, TextRoad Publication  ISSN 2090 -4304  \nJournal of Basic and Applied  \nScientific Research  \nwww.textroad.com  \n \n*Corresponding Author : Hamedreza Kohzadi, Departme nt of English Literature, Arak Branch, Islamic Azad  University, Arak, Iran .  \n                                           E-mail: hamedreza_kohzadi_usa@yahoo.com . \n A Study of Franz Kafka’s The Metamorphosis  \n \nHamedreza Kohzadi 1, Fatemeh Azizmohammadi 2, Mahboubeh Nouri 3   \n \n2,1Department of English Literature, Arak Branch, Islamic Azad University, Arak, Iran  \n3Department of Arts and Humanities, Scie nce and Research Branch, Islamic Azad University, \nArak, Iran  \n \n \nABSTRACT  \nThe story of The Metamorphosis  is easily told. It is the story of a travelling salesman by the name \nGregor Samsa who wakes up one morning transformed into a hideous an

In [142]:
len(final_documents)

54

In [143]:
## Embedding Using Huggingface
huggingface_embeddings=HuggingFaceBgeEmbeddings(
    model_name="BAAI/bge-small-en-v1.5",      #sentence-transformers/all-MiniLM-l6-v2
    model_kwargs={'device':'cpu'},
    encode_kwargs={'normalize_embeddings':True}

)

In [144]:
import  numpy as np
print(np.array(huggingface_embeddings.embed_query(final_documents[0].page_content)))
print(np.array(huggingface_embeddings.embed_query(final_documents[0].page_content)).shape)

[ 4.07762788e-02  1.11004245e-02 -2.38742121e-03  7.89812803e-02
 -4.73974012e-02 -3.03049404e-02  1.68100260e-02  5.42226136e-02
 -4.48965095e-02 -9.93691571e-03 -1.23756751e-02  1.11104846e-02
 -9.44996811e-03 -9.64715332e-03  4.48650122e-02  6.06569112e-04
  2.58104056e-02  2.57435143e-02 -1.60797387e-02  1.87225931e-03
  7.79212192e-02 -2.86992341e-02  2.29053088e-02 -2.06236150e-02
  2.47524753e-02  1.15705766e-02  5.12356050e-02  3.22968001e-03
 -3.72636579e-02 -1.52279511e-01 -3.73289920e-02  5.58387372e-04
  6.92994371e-02  1.56859905e-02 -6.24925504e-03  3.87277715e-02
 -6.83134273e-02  1.55024044e-02  1.62292551e-02 -8.38078279e-03
 -3.13001163e-02  3.17705385e-02  2.18730774e-02  1.23833474e-02
  1.38000110e-02 -1.07769938e-02 -2.88995244e-02  8.53419304e-03
 -3.21921743e-02 -3.38666551e-02 -1.27355933e-01 -1.04133040e-02
 -1.19396590e-03  3.12590413e-02  3.25550884e-02 -1.67254601e-02
  6.06036149e-02  3.57260508e-03  9.65489075e-02  5.12825996e-02
  3.82161210e-03  1.49693

In [145]:
vectorstore=FAISS.from_documents(final_documents[:120],huggingface_embeddings)

In [146]:
vectorstore

<langchain_community.vectorstores.faiss.FAISS at 0x309d6b6a0>

In [147]:
## Query using Similarity Search
query="what is the name of the salesman?"
question="what is the name of the salesman?"
relevant_docments=vectorstore.similarity_search(query)
print(relevant_docments[0].page_content)

J. Basic. Appl. Sci. Res. , 2(2)1600 -1607 , 2012  
© 2012, TextRoad Publication  ISSN 2090 -4304  
Journal of Basic and Applied  
Scientific Research  
www.textroad.com  
 
*Corresponding Author : Hamedreza Kohzadi, Departme nt of English Literature, Arak Branch, Islamic Azad  University, Arak, Iran .  
                                           E-mail: hamedreza_kohzadi_usa@yahoo.com . 
 A Study of Franz Kafka’s The Metamorphosis  
 
Hamedreza Kohzadi 1, Fatemeh Azizmohammadi 2, Mahboubeh Nouri 3   
 
2,1Department of English Literature, Arak Branch, Islamic Azad University, Arak, Iran  
3Department of Arts and Humanities, Scie nce and Research Branch, Islamic Azad University, 
Arak, Iran  
 
 
ABSTRACT  
The story of The Metamorphosis  is easily told. It is the story of a travelling salesman by the name 
Gregor Samsa who wakes up one morning transformed into a hideous and monstrous vermi n; he


In [148]:
retriever=vectorstore.as_retriever(search_type="similarity",search_kwargs={"k":3})
print(retriever)

tags=['FAISS', 'HuggingFaceBgeEmbeddings'] vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x309d6b6a0> search_kwargs={'k': 3}


In [149]:
import os
HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")  # Retrieve from environment variable

In [150]:
from langchain_huggingface import HuggingFaceEndpoint
repo_id="mistralai/Mistral-7B-Instruct-v0.2"
llm=HuggingFaceEndpoint(repo_id=repo_id,max_length=128,temperature=0.7,token=HUGGINGFACEHUB_API_TOKEN)
llm.invoke(query)

                    max_length was transferred to model_kwargs.
                    Please make sure that max_length is what you intended.
                    token was transferred to model_kwargs.
                    Please make sure that token is what you intended.


'\n\nThe name of the salesman is not mentioned in the text. The text only describes him as an old man with a white beard and wearing a rusty brown derby hat.'

In [151]:
llm.invoke(query)

'\n\nThe name of the salesman is not mentioned in the text. The text only describes him as an old man with a white beard and wearing a rusty brown derby hat.'

In [153]:
prompt_template="""
Use the following piece of context to answer the question asked.
Please try to provide the answer only based on the context.

{context}
Question:{question}

Helpful Answers:
"""

In [154]:
prompt=PromptTemplate(template=prompt_template,input_variables=["context","question"])
print(prompt)


input_variables=['context', 'question'] input_types={} partial_variables={} template='\nUse the following piece of context to answer the question asked.\nPlease try to provide the answer only based on the context.\n\n{context}\nQuestion:{question}\n\nHelpful Answers:\n'


In [155]:
retrievalQA=RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt":prompt}
)

In [156]:
result = retrievalQA.invoke({"query": query})
print(result['result'])

Answer:
Gregor Samsa

Reference(s):
J. Basic. Appl. Sci. Res., 2(2), 1600 -1607, 2012.
