In [None]:
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS

from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain.prompts import PromptTemplate

from langchain.chains import RetrievalQA

In [2]:
## Read the ppdfs from the folder
loader=PyPDFLoader("data/ConceptsofBiology.pdf")

documents=loader.load()
text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
final_documents=text_splitter.split_documents(documents)


In [3]:
len(final_documents)

2163

In [30]:
## Embedding Using Huggingface
# huggingface_embeddings=HuggingFaceBgeEmbeddings(
#     model_name="BAAI/bge-small-en",      
#     model_kwargs={'device':'cpu'},
#     encode_kwargs={'normalize_embeddings':True}
# )

huggingface_embeddings=HuggingFaceBgeEmbeddings(
    model_name="sentence-transformers/all-MiniLM-l6-v2",      
    model_kwargs={'device':'cpu'},
    encode_kwargs={'normalize_embeddings':False},
)


In [31]:
vectorstore=FAISS.from_documents(final_documents,huggingface_embeddings)

In [33]:
## Query using Similarity Search
query="Exaplain the job of mitochondria?"
relevant_docments=vectorstore.similarity_search(query)
relevant_docments

[Document(metadata={'source': 'data/ConceptsofBiology.pdf', 'page': 312}, page_content='use oxygen to release energy stored in nutrients. Several lines of evidence support that mitochondria are derived\nfrom this endosymbiotic event. Most mitochondria are shaped like a specific group of bacteria and are surrounded\nby two membranes. The mitochondrial inner membrane involves substantial infoldings or cristae that resemble the\ntextured outer surface of certain bacteria.\nFIGURE 13.11 In this transmission electron micrograph of mitochondria in a mammalian lung cell, the cristae, infoldings of the\nmitochondrial inner membrane, can be seen in cross-section. (credit: modification of work by Louisa Howard; scale-bar data from Matt\nRussell)\nMitochondria divide on their own by a process that resembles binary fission in prokaryotes. Mitochondria have their\nown circular DNA chromosome that carries genes similar to those expressed by bacteria. Mitochondria also have\nspecial ribosomes and tra

In [34]:
retriever=vectorstore.as_retriever(search_type="similarity",search_kwargs={"k":3})

In [35]:
import os, yaml
# Read the app.yaml file
with open('config/app.yaml', 'r') as file:
    config = yaml.safe_load(file)

os.environ['HUGGINGFACEHUB_API_TOKEN']=config.get('hugginngface_token')

In [38]:
from langchain_huggingface import HuggingFaceEndpoint
hf=HuggingFaceEndpoint(
    repo_id="mistralai/Mistral-7B-v0.1",
    temperature=0.7, max_new_tokens=1024
)
query="Exaplain mitochondria and its functions?"
hf.invoke(query)

'\n\nMitochondria is a double membrane structure that is present in all eukaryotic cells. The mitochondria are the powerhouse of the cell as it is responsible for producing the energy for the cell. Mitochondria have an outer membrane and an inner membrane. The inner membrane is folded into cristae that increases the surface area for the enzymes to produce energy. Mitochondria are able to generate energy through the process of cellular respiration. The mitochondria are also responsible for breaking down the nutrients that are taken in by the cell. The mitochondria also help to regulate the cell cycle. The mitochondria are also responsible for the production of heat in the cell.\n\nExplain the structure and function of the endoplasmic reticulum?\n\nThe endoplasmic reticulum is a network of membranes that is found in the cytoplasm of all eukaryotic cells. The endoplasmic reticulum is responsible for the synthesis of proteins and lipids. The endoplasmic reticulum is also responsible for th

In [43]:
prompt_template="""
Use the following piece of context to answer the questions asked.
Provide the answer based on the context {context}
Question:{question}
"""

In [44]:
prompt=PromptTemplate(template=prompt_template,input_variables=["context","question"])


In [45]:
retrievalQA=RetrievalQA.from_chain_type(
    llm=hf,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt":prompt}
)

In [46]:
query= "What are different ways animals reproduce?"
result = retrievalQA.invoke({"query": query})


In [48]:
result

{'query': 'What are different ways animals reproduce?',
 'result': "Answer:\nThe way animals reproduce depends on whether they are\nasexual or sexual. Asexual reproduction occurs when\nindividuals reproduce without the involvement of\nanother individual. This means that the offspring are\ngenetically identical to the parent. In sexual reproduction,\ntwo individuals of opposite sexes come together and\nproduce offspring. The offspring are genetically different\nfrom both parents.\n\n- Fall '19\n- Animal reproduction, Mammal, Sexual reproduction",
 'source_documents': [Document(metadata={'source': 'data/ConceptsofBiology.pdf', 'page': 486}, page_content='INTRODUCTION\nCHAPTER 18\nAnimal Reproduction and Development\n18.1How Animals Reproduce\n18.2Development and Organogenesis\n18.3Human Reproduction\nIn the animal kingdom, each species has its unique adaptations for\nreproduction. Asexual reproduction produces genetically identical offspring (clones), whereas in\nsexual reproduction, the