In [None]:
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

from langchain.chains import RetrievalQA
import os

In [31]:
# Load PDF
loader = PyPDFLoader("dataset/12th_chemistry.pdf")
documents = loader.load()

In [32]:
# Split text
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
docs = splitter.split_documents(documents)

In [33]:
# Embeddings
embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

In [34]:
# Vector Store
vectorstore = FAISS.from_documents(docs, embedding_model)
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k":10})

In [14]:
vectorstore.index_to_docstore_id

{0: '66455c6d-cc3a-4168-96be-20ae3b3501b9',
 1: '86afe0ed-331c-4d8b-a1bb-c783da83810a',
 2: '89c85d68-0344-46f1-ba62-6c3a6bed89f6',
 3: '6746ad02-2974-4c4c-9805-a631c949f927',
 4: '68d34f50-2561-46f6-a3c1-2e3258c3de42',
 5: '70d7481e-8074-4cf0-b5d2-d1757c17448c',
 6: 'a67e33d0-9b24-4058-bc64-df85042e5dfb',
 7: '7819deab-b7fb-4fd3-9b42-275fc0c4cdc7',
 8: 'b0d1c6b4-afce-4667-99c0-f9e13cb4f333',
 9: '674e7dfc-27fe-41c4-b071-236c93afd999',
 10: '4a3ed03f-5d13-4859-9895-269b11083a43',
 11: 'dc5e202b-4412-4764-819c-b2194e53435d',
 12: 'a4040e20-fe1a-4127-910f-f1a56c88f669',
 13: '3ce16bc3-07ff-4538-8f32-319f70bce07b',
 14: '3a701093-dc5e-4ca0-bbcb-cce31bd8333a',
 15: 'af275d4c-ee7a-4fd3-b1c7-805150af5bd9',
 16: '71b2f69f-9de2-4bec-bbbb-72ffb763b603',
 17: 'fbca48c9-96e1-4c93-bdf5-9acdf9d0214f',
 18: '57aeb8a5-a642-4370-bfef-ea268522cb28',
 19: '33033cd6-3c53-482b-9085-129486a8dd68',
 20: '1c787386-5263-4f8b-bc80-9b1d44da195c',
 21: 'a963b91f-3c00-4386-a36c-f9a9d58e7459',
 22: '0113a5be-7429-

In [15]:
vectorstore.get_by_ids(["86afe0ed-331c-4d8b-a1bb-c783da83810a"])

[Document(id='86afe0ed-331c-4d8b-a1bb-c783da83810a', metadata={'producer': 'Pdftools SDK', 'creator': 'PyPDF', 'creationdate': '', 'moddate': '2025-05-20T04:44:51+00:00', 'source': 'dataset/12th_chemistry.pdf', 'total_pages': 284, 'page': 0, 'page_label': '1'}, page_content='properties exhibited by some\nsolutes in solutions.\nIn normal life we rarely come across pure substances.\nMost of these are mixtures containing two or more pure\nsubstances. Their utility or importance in life depends\non their composition. For example, the properties of\nbrass (mixture of copper and zinc) are quite different\nfrom those of Ger man silver (mixtur e of copper , zinc\nand nickel) or bronze (mixture of copper and tin);\n1 part per million (ppm) of fluoride ions in water')]

In [18]:
retriever.invoke("what is periodic table?")

[Document(id='b6eefc44-ef27-4082-ba96-4dbadf63f59c', metadata={'producer': 'Pdftools SDK', 'creator': 'PyPDF', 'creationdate': '', 'moddate': '2025-05-20T04:44:51+00:00', 'source': 'dataset/12th_chemistry.pdf', 'total_pages': 284, 'page': 88, 'page_label': '89'}, page_content='The d-block of the periodic table contains the elements\nof the groups 3-12 in which the d  orbitals are\nprogressively filled in each of the four long periods.\nThe f-block consists of elements in which 4 f and 5 f\norbitals are progressively filled . They are placed in a\nseparate panel at the bottom of the periodic table.  The\nnames transition metals  and inner transition  metals\nare often used to refer to the elements of d-and\nf-blocks respectively.'),
 Document(id='4b6cf0f2-a616-4f45-a1e0-0d01d60890d1', metadata={'producer': 'Pdftools SDK', 'creator': 'PyPDF', 'creationdate': '', 'moddate': '2025-05-20T04:44:51+00:00', 'source': 'dataset/12th_chemistry.pdf', 'total_pages': 284, 'page': 113, 'page_label': 

In [59]:
from langchain_groq import ChatGroq
from dotenv import load_dotenv
load_dotenv()
llm = ChatGroq(
    model_name="llama3-70b-8192",  # or llama3-70b-8192, etc.
    api_key=os.environ["grok_key"]
)

In [36]:
from langchain.prompts import PromptTemplate

prompt = PromptTemplate(
    
    template="""
    You're a helpful chemistry tutor.
    Answer ONLY from the provided context,
    If the context is insufficient, just say out of syllabus.

    Context:
    {context}

    Question:
    {question}

    Answer:""",
    input_variables=["context", "question"],
)

In [61]:
question = "is periodic table a topic in this book. if yes elaborate it"
retriever_docs = retriever.invoke(question)

In [62]:
context_text = "\n\n".join(doc.page_content for doc in retriever_docs)

In [63]:
final_prompt = prompt.invoke({"context": context_text, "question":question})

In [43]:
final_prompt

StringPromptValue(text="\n    You're a helpful chemistry tutor.\n    Answer ONLY from the provided context,\n    If the context is insufficient, just say out of syllabus.\n\n    Context:\n    both these technologies are based on electrochemical principles.\nIntext QuestionsIntext QuestionsIntext QuestionsIntext QuestionsIntext Questions\n2.13 Write the chemistry of recharging the lead storage battery, highlighting\nall the materials that are involved during recharging.\n2.14 Suggest two materials other than hydrogen that can be used as fuels in\nfuel cells.\n2.15 Explain how rusting of iron is envisaged as setting up of an\nelectrochemical cell.\nSummary SummarySummary SummarySummary\n\nused on a large scale in various instruments and\ndevices. The reactions carried out electrochemically\ncan be energy efficient and less polluting. Therefore,\nstudy of electrochemistry is important for creating new\ntechnologies that are ecofriendly. The transmission of\nsensory signals through cells t

In [64]:
ans = llm.invoke(final_prompt)
print(ans)

content='Yes, the periodic table is a topic in this book. The book discusses the d-block and f-block elements in the periodic table, their positions, electronic configurations, and properties. It explains the filling of d and f orbitals, lanthanoid and its consequences, and the differences between transition metals and inner transition metals. The book also provides tables of properties of actinoids and lanthanoids, highlighting their atomic and ionic radii, and electronic configurations.' additional_kwargs={} response_metadata={'token_usage': {'completion_tokens': 95, 'prompt_tokens': 1670, 'total_tokens': 1765, 'completion_time': 0.375316649, 'prompt_time': 0.061478797, 'queue_time': 0.05534779299999999, 'total_time': 0.436795446}, 'model_name': 'llama3-70b-8192', 'system_fingerprint': 'fp_dd4ae1c591', 'finish_reason': 'stop', 'logprobs': None} id='run--f4cf7d95-a884-4841-9852-4d7872ef2b03-0' usage_metadata={'input_tokens': 1670, 'output_tokens': 95, 'total_tokens': 1765}


In [68]:
from langchain_core.runnables import RunnableParallel, RunnablePassthrough, RunnableLambda
from langchain_core.output_parsers import StrOutputParser

In [None]:
def format_docs(retrived_docs):
    context_text = "\n\n".join(doc.page_content for doc in retriever_docs)
    return context_text

In [70]:
parallel_chain = RunnableParallel({
    'context': retriever | RunnableLambda(format_docs),
    'question': RunnablePassthrough()
})

In [71]:
parallel_chain.invoke("is periodic table a topic in this book. if yes elaborate it")

{'context': 'The d-block of the periodic table contains the elements\nof the groups 3-12 in which the d  orbitals are\nprogressively filled in each of the four long periods.\nThe f-block consists of elements in which 4 f and 5 f\norbitals are progressively filled . They are placed in a\nseparate panel at the bottom of the periodic table.  The\nnames transition metals  and inner transition  metals\nare often used to refer to the elements of d-and\nf-blocks respectively.\n\nThe two series of inner transition elements, lanthanoids and actinoids constitute\nthe f-block of the periodic table. With the successive filling of the inner orbitals, 4 f, there\nis a gradual decrease in the atomic and ionic sizes of these metals along the series\n(lanthanoid contraction). This has far reac hing consequences in the chemistry of the\nelements succeeding them. Lanthanum and all the lanthanoids are rather soft white\n\nsymbols and some properties of these elements are given in Table 4.10.\nTable 4.10: 

In [72]:
parser = StrOutputParser()

In [73]:
main_chain = parallel_chain | prompt | llm | parser

In [76]:
main_chain.invoke( 'can you summarize the book')

'The book discusses the d-block and f-block elements in the periodic table. The d-block elements are transition metals, and the f-block elements are inner transition metals. The f-block consists of two series: lanthanoids and actinoids. The book explains the electronic configurations, atomic and ionic radii, and properties of these elements. It also discusses the lanthanoid contraction, which has significant consequences for the chemistry of the elements that follow them in their respective periods. Additionally, the book touches on the applications of d- and f-block elements.'

In [65]:
rag_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    return_source_documents=True
)

In [67]:
rag_chain.invoke("is computer a topic in this book. if yes elaborate it")

{'query': 'is computer a topic in this book. if yes elaborate it',
 'result': 'No, computers are not a topic in this book. The book appears to be focused on chemistry, specifically electrochemistry, chemical kinetics, and other related topics. The context provided does not mention computers or any related technology.',
 'source_documents': [Document(id='3785df8f-a2d5-4375-9757-4a426fbf9106', metadata={'producer': 'Pdftools SDK', 'creator': 'PyPDF', 'creationdate': '', 'moddate': '2025-05-20T04:44:51+00:00', 'source': 'dataset/12th_chemistry.pdf', 'total_pages': 284, 'page': 57, 'page_label': '58'}, page_content='both these technologies are based on electrochemical principles.\nIntext QuestionsIntext QuestionsIntext QuestionsIntext QuestionsIntext Questions\n2.13 Write the chemistry of recharging the lead storage battery, highlighting\nall the materials that are involved during recharging.\n2.14 Suggest two materials other than hydrogen that can be used as fuels in\nfuel cells.\n2.15 Ex