In [None]:
import os
from dotenv import load_dotenv
from pymongo import MongoClient

load_dotenv()

# MongoDB
MONGO_URI = os.getenv("MONGO_URI")
DB_NAME = os.getenv("DB_NAME")
COLLECTION_NAME = os.getenv("COLLECTION_NAME")
VECTOR_INDEX = os.getenv("VECTOR_INDEX")

In [8]:
# Create a new client and connect to the server
client = MongoClient(MONGO_URI)
collection = client[DB_NAME][COLLECTION_NAME]

In [9]:
# Send a ping to confirm a successful connection
try:
    client.admin.command('ping')
    print("Pinged your deployment. You successfully connected to MongoDB!")
except Exception as e:
    print(e)

Pinged your deployment. You successfully connected to MongoDB!


In [10]:
# ingest documents
from langchain_community.document_loaders import PyPDFLoader

file_path = "Swapnil_Resume_Nov.pdf"

loader = PyPDFLoader(file_path)
docs = loader.load()
docs

[Document(metadata={'producer': 'Microsoft® Word 2019', 'creator': 'Microsoft® Word 2019', 'creationdate': '2025-03-26T10:09:11+05:30', 'author': 'Austin, Heather', 'moddate': '2025-03-26T10:09:11+05:30', 'source': 'Swapnil_Resume_Nov.pdf', 'total_pages': 2, 'page': 0, 'page_label': '1'}, page_content='Swapnil Katiyar  \n+91-7503533658     •     swapnil240695@gmail.com    •     Noida, India \nFront-End Developer \nNext.js    |    React.js    |    JavaScript (ES6+)    |    Git   |    Jira \nPassionate Front-End Developer skilled in HTML, CSS, JavaScript, React, and Next.js, with hands-on experience in \nbuilding responsive and interactive web applications. Strong focus on delivering high-quality, maintainable code and \ncrafting exceptional user experiences through collaboration and innovation. \nPROFESSIONAL SKILLS \n• Frontend Development: HTML5, CSS3, JavaScript, TypeScript, React, Next.js \n• Frameworks/Libraries: Redux, Material-UI, React Hook Form, Tailwind CSS \n• Version Control

In [None]:
# clean documents
import re


def clean_text(text):
    text = re.sub(r"\n+", "\n", text)
    text = re.sub(r"Page \d+", "", text)
    return text.strip()


for doc in docs:
    doc.page_content = clean_text(doc.page_content) ## why is this line required since docs is directly used in text_splitter
    print(doc.page_content)

Swapnil Katiyar  
+91-7503533658     •     swapnil240695@gmail.com    •     Noida, India 
Front-End Developer 
Next.js    |    React.js    |    JavaScript (ES6+)    |    Git   |    Jira 
Passionate Front-End Developer skilled in HTML, CSS, JavaScript, React, and Next.js, with hands-on experience in 
building responsive and interactive web applications. Strong focus on delivering high-quality, maintainable code and 
crafting exceptional user experiences through collaboration and innovation. 
PROFESSIONAL SKILLS 
• Frontend Development: HTML5, CSS3, JavaScript, TypeScript, React, Next.js 
• Frameworks/Libraries: Redux, Material-UI, React Hook Form, Tailwind CSS 
• Version Control & Collaboration: Git, Jira 
• Testing: React Testing Library, Jest, Vitest 
WORK EXPERIENCE 
Treeroot Informatics – Ahmedabad, Gujarat, India January 2023 – April 2024 
Front End Developer 
• Worked closely with a team of 8 developers to deliver high-quality front-end features for projects, utilizing 
React and 

In [7]:
# split to chunks
from langchain_text_splitters import RecursiveCharacterTextSplitter

splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)
chunks = splitter.split_documents(docs)
chunks

[Document(metadata={'producer': 'Microsoft® Word 2019', 'creator': 'Microsoft® Word 2019', 'creationdate': '2025-03-26T10:09:11+05:30', 'author': 'Austin, Heather', 'moddate': '2025-03-26T10:09:11+05:30', 'source': 'Swapnil_Resume_Nov.pdf', 'total_pages': 2, 'page': 0, 'page_label': '1'}, page_content='Swapnil Katiyar  \n+91-7503533658     •     swapnil240695@gmail.com    •     Noida, India \nFront-End Developer \nNext.js    |    React.js    |    JavaScript (ES6+)    |    Git   |    Jira \nPassionate Front-End Developer skilled in HTML, CSS, JavaScript, React, and Next.js, with hands-on experience in \nbuilding responsive and interactive web applications. Strong focus on delivering high-quality, maintainable code and \ncrafting exceptional user experiences through collaboration and innovation. \nPROFESSIONAL SKILLS \n• Frontend Development: HTML5, CSS3, JavaScript, TypeScript, React, Next.js \n• Frameworks/Libraries: Redux, Material-UI, React Hook Form, Tailwind CSS \n• Version Control

In [11]:
# create embeddings and insert into vector database
from langchain_openai import OpenAIEmbeddings, ChatOpenAI

os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

embeddings = OpenAIEmbeddings(model="text-embedding-3-large")

for i, chunk in enumerate(chunks):
    embedding = embeddings.embed_query(chunk.page_content)

    collection.insert_one(
        {
            "text": chunk.page_content,
            "embedding": embedding,
            "metadata": {
                "page": chunk.metadata.get("page"),
                "source": file_path,
                "chunk_id": i,
            },
        }
    )

In [14]:
# perform vector search and retrieve relevant contexts

query = "List all the professional skills"

query_embedding = embeddings.embed_query(query)

pipeline = [
    {
        "$vectorSearch": {
            "index": VECTOR_INDEX,
            "path": "embedding",
            "queryVector": query_embedding,
            "numCandidates": 100,
            "limit": 4,
        }
    },
    {
        "$project": {
            "_id": 0,
            "text": 1,
            "metadata": 1,
            "score": {"$meta": "vectorSearchScore"},
        }
    },
]

results = list(collection.aggregate(pipeline))
results

[{'text': 'and reliable code integration across the team. \nSharpener Tech- Bangalore, Karnataka, India                                                   January 2022 – January 2023 \nStudent \n• Acquired proficiency in HTML, JavaScript, CSS, React, Material-UI, Redux, and Firebase during a hands-on \nwork focused on front-end development technologies. \n• Mentored 8+ aspiring developers, offering guidance and support to accelerate their skill development. \n• Completed diverse self-learning projects, showcasing practical skills and a commitment to improvement. \nEconstruct Design and Build Pvt. Ltd.- Bangalore, Karnataka, India        June 2019 – January 2022 \nStructural Design Engineer \n• Applied structural analysis and design software to ensure accuracy and compliance with industry standards.',
  'metadata': {'page': 0, 'source': 'Swapnil_Resume_Nov.pdf', 'chunk_id': 2},
  'score': 0.6708278059959412},
 {'text': '• Testing: React Testing Library, Jest, Vitest \nWORK EXPERIENCE \nT

In [None]:
for i, res in enumerate(results):
    print(f"Result {i+1}: {res['text']}\n")

Result 1: and reliable code integration across the team. 
Sharpener Tech- Bangalore, Karnataka, India                                                   January 2022 – January 2023 
Student 
• Acquired proficiency in HTML, JavaScript, CSS, React, Material-UI, Redux, and Firebase during a hands-on 
work focused on front-end development technologies. 
• Mentored 8+ aspiring developers, offering guidance and support to accelerate their skill development. 
• Completed diverse self-learning projects, showcasing practical skills and a commitment to improvement. 
Econstruct Design and Build Pvt. Ltd.- Bangalore, Karnataka, India        June 2019 – January 2022 
Structural Design Engineer 
• Applied structural analysis and design software to ensure accuracy and compliance with industry standards.

Result 2: • Testing: React Testing Library, Jest, Vitest 
WORK EXPERIENCE 
Treeroot Informatics – Ahmedabad, Gujarat, India January 2023 – April 2024 
Front End Developer 
• Worked closely with a team

In [32]:
# generate answer with LLM
llm = ChatOpenAI(model="gpt-4o", temperature=0, streaming=True)

contexts = [r["text"] for r in results]
context_text = "\n\n".join(contexts)  ## What does this line do?

prompt = f"""
You are a helpful assistant.
Answer ONLY using the context below.
If the answer is not found, say "Not found in the document."

Context:
{context_text}

Question:
{query}
"""

## explore if retrieval_chain and ChatPromptTemplate can be used here...

answer = llm.stream(prompt)

full_answer = ""
for chunk in answer:
    full_answer += chunk.content

print(full_answer)

• Frontend Development: HTML5, CSS3, JavaScript, TypeScript, React, Next.js
• Frameworks/Libraries: Redux, Material-UI, React Hook Form, Tailwind CSS
• Version Control & Collaboration: Git, Jira
• Testing: React Testing Library, Jest, Vitest
