In [60]:
from langchain_community.vectorstores import Chroma
from langchain_community.document_loaders import PyPDFLoader
from langchain_ollama.embeddings import OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter, CharacterTextSplitter

In [61]:
pdf_docs = PyPDFLoader(file_path="data/EduTrack_FAQ_assignment.pdf").load()
len(pdf_docs)

2

In [62]:
splitter = RecursiveCharacterTextSplitter(chunk_size=360, chunk_overlap = 50)
chunked_docs = splitter.split_documents(pdf_docs)
chunked_docs

[Document(metadata={'producer': 'ReportLab PDF Library - www.reportlab.com', 'creator': '(unspecified)', 'creationdate': '2025-06-05T12:53:08+00:00', 'author': '(anonymous)', 'keywords': '', 'moddate': '2025-06-05T12:53:08+00:00', 'subject': '(unspecified)', 'title': '(anonymous)', 'trapped': '/False', 'source': 'data/EduTrack_FAQ_assignment.pdf', 'total_pages': 2, 'page': 0, 'page_label': '1'}, page_content='EduTrack – Frequently Asked Questions\nQ1: What is EduTrack used for?\nA1: EduTrack helps educational institutions monitor student engagement, analyze learning\nbehavior, and proactively support at-risk learners through data-driven insights.\nQ2: Which platforms does EduTrack integrate with?'),
 Document(metadata={'producer': 'ReportLab PDF Library - www.reportlab.com', 'creator': '(unspecified)', 'creationdate': '2025-06-05T12:53:08+00:00', 'author': '(anonymous)', 'keywords': '', 'moddate': '2025-06-05T12:53:08+00:00', 'subject': '(unspecified)', 'title': '(anonymous)', 'trapped

In [63]:
embedding_model_name = "nomic-embed-text:latest"
embedding = OllamaEmbeddings(
    model=embedding_model_name
)

In [64]:
db = Chroma.from_documents(chunked_docs, embedding, persist_directory="./chrome_db") # To save it in disk
db

# To read it from disk
# db = Chroma(embedding_function=embedding,persist_directory="./chrome_db")

<langchain_community.vectorstores.chroma.Chroma at 0x16cc9d760>

In [65]:
res = db.similarity_search("Is EduTrack beneficial for teachers as well?")
res

[Document(metadata={'title': '(anonymous)', 'trapped': '/False', 'source': 'data/EduTrack_FAQ_assignment.pdf', 'moddate': '2025-06-05T12:53:08+00:00', 'producer': 'ReportLab PDF Library - www.reportlab.com', 'total_pages': 2, 'author': '(anonymous)', 'page': 0, 'subject': '(unspecified)', 'creationdate': '2025-06-05T12:53:08+00:00', 'creator': '(unspecified)', 'page_label': '1', 'keywords': ''}, page_content='Q5: How does EduTrack benefit teachers?\nA5: Instructors receive weekly summaries, alerts about disengaged students, and tools to\nsend personalized feedback or motivational nudges.\nQ6: Can students access their own dashboards?\nA6: Yes. Students can view their own learning progress, receive AI-generated tips, and'),
 Document(metadata={'page_label': '1', 'source': 'data/EduTrack_FAQ_assignment.pdf', 'total_pages': 2, 'creationdate': '2025-06-05T12:53:08+00:00', 'keywords': '', 'title': '(anonymous)', 'trapped': '/False', 'page': 0, 'author': '(anonymous)', 'moddate': '2025-06-05

In [66]:
db

<langchain_community.vectorstores.chroma.Chroma at 0x16cc9d760>

In [67]:
# Retrieve option
retr = db.as_retriever()
retr.invoke("Is EduTrack beneficial for teachers as well?")[0]

Document(metadata={'page': 0, 'title': '(anonymous)', 'subject': '(unspecified)', 'creationdate': '2025-06-05T12:53:08+00:00', 'producer': 'ReportLab PDF Library - www.reportlab.com', 'source': 'data/EduTrack_FAQ_assignment.pdf', 'creator': '(unspecified)', 'author': '(anonymous)', 'total_pages': 2, 'moddate': '2025-06-05T12:53:08+00:00', 'page_label': '1', 'keywords': '', 'trapped': '/False'}, page_content='Q5: How does EduTrack benefit teachers?\nA5: Instructors receive weekly summaries, alerts about disengaged students, and tools to\nsend personalized feedback or motivational nudges.\nQ6: Can students access their own dashboards?\nA6: Yes. Students can view their own learning progress, receive AI-generated tips, and')