In [1]:
import os
import google.generativeai as genai
from langchain.document_loaders import TextLoader                                           
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from dotenv import load_dotenv

load_dotenv()

True

In [2]:
# Configure Gemini API
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))

In [3]:
# 1. Load and Process the Resume
resumeLoader = TextLoader("HariSamynaath_resume_20250210.txt")
resumeDocs = resumeLoader.load()
# split to chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
ResumeTexts = text_splitter.split_documents(resumeDocs)

In [4]:
# repeat same with elaborate write up
writeupLoader = TextLoader("HariSamynaath_profile_writeup.txt")
writeupDocs = writeupLoader.load()
# split to chunks
writeupTexts = text_splitter.split_documents(writeupDocs)

In [5]:
texts = ResumeTexts
texts.extend(writeupTexts)

In [6]:
def myWrap(text, width=80):
    lines = text.splitlines()
    wrapped_lines = []

    for line in lines:
        words = line.split()
        newLine = ""
        for word in words:
            if len(newLine) + len(word) + 1 <= width:
                newLine = ' '.join([newLine, word])
            else:
                wrapped_lines.append(newLine.strip())
                newLine = word
        
        wrapped_lines.append(newLine.strip())
        
    return '\n'.join(wrapped_lines)

In [7]:
for text in texts:
    myStr = text.model_dump()['page_content']
    print(myWrap(myStr))
    print(f"\n{len(myStr)} ", "="*10,end="\n\n")

Hari Samynaath S
Chennai, India | +91 967 789 7321 | harinaathan@gmail.com |
https://www.linkedin.com/in/harinaathan/

-----------------------------------------------------------------------------------------------------------------------------------------------------------

Summary
Data Scientist with 10+ years of progressive experience in manufacturing,
procurement, and data science. Proven ability to leverage data analysis, machine
learning, and process improvement to drive cost savings and enhance efficiency.
Developed high-impact Power BI dashboards with 1000+ monthly views and top 100
company-wide ranking. Currently developing a predictive model projected to save
$0.35 million annually. Experienced in remote work. Adept at leading teams and
managing projects. Seeking a Data Science Leadership role.


Skills
Data Science: Machine Learning, Deep Learning, Predictive Modeling, Time Series
Analysis, Data Visualization, NLP, TF-IDF, Matrix Decomposition.
Tools: Power BI, Azure ML, Dev

In [None]:
len(texts)

In [8]:
from sentence_transformers import SentenceTransformer

class SentenceTransformerEmbeddings:
    def __init__(self, model_name="all-mpnet-base-v2"):
        self.model = SentenceTransformer(model_name)

    def embed_documents(self, texts):
        return self.model.encode(texts, convert_to_tensor=False).tolist()

    def embed_query(self, text):
        return self.model.encode([text], convert_to_tensor=False).tolist()[0]

In [9]:
embeddings = SentenceTransformerEmbeddings("all-mpnet-base-v2")
# embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

In [10]:
db = Chroma.from_documents(texts, embeddings, persist_directory="./chroma_db") #Local persistance.

In [11]:
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash-001")
retriever = db.as_retriever()
qa_chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)

In [12]:
def ask_question(question):
    response = qa_chain.invoke(question)
    return response['result']


In [13]:
questions = [
    "What are your key skills?",
    "What are your professional experiences?",
    "What projects have you worked on?",
    "What is your educational background?"
]

for model in genai.list_models():
    print(model)

In [15]:
for question in questions:
    answer = ask_question(question)
    print(f"Question: {question}")
    print(f"Answer: {answer}\n")

Question: What are your key skills?
Answer: My key skills include:

*   **Data Science:** Machine Learning (Supervised/Unsupervised), Deep Learning (CNN, RNN, Transformers), Feature Engineering, Feature Selection, Model Development, Model Tuning, Data Visualization, Time Series Analysis, Predictive Modeling, Statistical Analysis, Image Processing, Natural Language Processing (NLP), TF-IDF, Matrix Decomposition.
*   **Data Science Tools/Technologies:** Power BI, Azure Machine Learning, Azure DevOps, Jupyter, VS Code, SQL (Snowflake), Python, YAML, Power Query, DAX.
*   **Programming Languages:** Python, SQL, YAML
*   **Prior Experience with Tools/Technologies:** Excel (VBA, Macros), MATLAB, Hyperworks Optistruct, ProE, C/C++.
*   **Other Skills:** Project Management, Process Improvement, Cost Optimization, Negotiation, Strategic Sourcing, Vendor Management, Cross-functional Collaboration, Communication (written and verbal), Problem Solving, Leadership, Mentoring, Team Management, Remote