In [1]:
import os
from dotenv import load_dotenv

In [2]:
load_dotenv()
GROQ_API_KEY = os.getenv("GROQ_API_KEY")

In [3]:
os.environ["GROQ_API_KEY"] = GROQ_API_KEY

In [4]:
pip show pypdf


Name: pypdf
Version: 6.6.0
Summary: A pure-python PDF library capable of splitting, merging, cropping, and transforming PDF files
Home-page: 
Author: 
Author-email: Mathieu Fenniak <biziqe@mathieu.fenniak.net>
License-Expression: BSD-3-Clause
Location: c:\users\hp\anaconda3\envs\ques\lib\site-packages
Requires: typing_extensions
Required-by: 
Note: you may need to restart the kernel to use updated packages.


In [5]:
!pip show pypdf



Name: pypdf
Version: 6.6.0
Summary: A pure-python PDF library capable of splitting, merging, cropping, and transforming PDF files
Home-page: 
Author: 
Author-email: Mathieu Fenniak <biziqe@mathieu.fenniak.net>
License-Expression: BSD-3-Clause
Location: c:\users\hp\anaconda3\envs\ques\lib\site-packages
Requires: typing_extensions
Required-by: 


In [6]:
from langchain_community.document_loaders import PyPDFLoader


  from .autonotebook import tqdm as notebook_tqdm


In [7]:
import langchain
print(langchain.__version__)


1.2.6


In [8]:
%pwd

'c:\\Users\\Hp\\Desktop\\question and answer\\research'

In [9]:
%cd ..

c:\Users\Hp\Desktop\question and answer


In [10]:
%pwd

'c:\\Users\\Hp\\Desktop\\question and answer'

In [11]:
file_path = "data/HSC26-Bangla1st-Paper.pdf"
loader = PyPDFLoader(file_path)
data = loader.load()

In [12]:
len(data)

49

In [13]:
# Clean the PDF text for better chunking
for doc in data:
    doc.page_content = doc.page_content.replace("\n", " ").replace("\r", "").strip()

print("Sample cleaned text:\n", data[0].page_content[:500])


Sample cleaned text:
 অনলাইন ব্যাচ সম্পর্কিত যেককাকনা জিজ্ঞাাসা , অপরিরিতা আল ািয রিষ য় িাাং া ১ম পত্র


In [14]:
from langchain_text_splitters import RecursiveCharacterTextSplitter


text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=600,
    chunk_overlap=50
)

chunks = text_splitter.split_documents(data)
print(f"Total chunks created: {len(chunks)}")
print(chunks[0].page_content[:500])


Total chunks created: 172
অনলাইন ব্যাচ সম্পর্কিত যেককাকনা জিজ্ঞাাসা , অপরিরিতা আল ািয রিষ য় িাাং া ১ম পত্র


In [47]:
from langchain_huggingface import HuggingFaceEmbeddings

embedding_model = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

vectors = [embedding_model.embed_query(chunk.page_content) for chunk in chunks]




In [16]:
from langchain_community.vectorstores import FAISS


vectorstore = FAISS.from_documents(chunks, embedding_model)
print("Vectorstore created successfully.")


Vectorstore created successfully.


In [17]:
query = "অনুপমের ভাষায় সুপুরুষ কাকে বলা হয়েছে?"
top_k_docs = vectorstore.similarity_search(query, k=3)




In [18]:
for i, doc in enumerate(top_k_docs):
    print(f"Chunk {i+1}: {doc.page_content[:200]}...")

Chunk 1: অনলাইন ব্যাচ সম্পর্কিত যেককাকনা জিজ্ঞাাসা , অপরিরিতা আল ািয রিষ য় িাাং া ১ম পত্র...
Chunk 2: যস্টিনমাস্টািতাকক অনযগার়্েকতযেকত ব্লকলওযসো না। 28...
Chunk 3: অনুপকমিওর্ব্ক িআসিযথকক জফকিআসকতহকতানা।তাইব্লাো , 'অপর্ির্চতা'গকেিউশ্চদ্দষ্ট্চর্িত্রের্দ উদ্দীপককিপকিকিিমকতাহকতা,তাহকলগকেিপর্িণর্তর্ভন্নহকতা।...


In [76]:
import os
from langchain_groq import ChatGroq
from langchain_core.prompts import PromptTemplate



llm = ChatGroq(
    model="llama-3.3-70b-versatile",
    temperature=0
)

prompt_template = """
Answer the question using ONLY the information below.
Give the exact answer without any explanation.
If the answer is not present, respond 'Answer not found'.

Context:
{text}

Question: {question}
Answer:
"""


prompt = PromptTemplate(
    input_variables=["text", "question"],
    template=prompt_template
)

chain = prompt | llm

context_text = " ".join([doc.page_content for doc in top_k_docs])




In [80]:
# User query
user_query = "কাকে অনুপমের ভাগ্য দেবতা বলে উল্লেখ করা হয়েছে?"

In [81]:
# Run the chain
result = chain.invoke({
    "text": context_text,
    "question": user_query
})

# Print grounded answer
print("Answer:", result.content)


Answer: অনুপকমিওর্ব্ক িআসিযথকক জফকিআসকতহকতানা।


Answer: Answer not found
