In [27]:
%pip install sentence-transformers faiss-cpu numpy transformers requests jinja2 tqdm serpapi

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Note: you may need to restart the kernel to use updated packages.


In [13]:
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
import pickle
import re
from tqdm.notebook import tqdm

model = SentenceTransformer("all-MiniLM-L6-v2")

ats_snippets = [
    "Use action verbs like 'Led', 'Managed', 'Developed', instead of passive phrases.",
    "Quantify your achievements, e.g., 'increased sales by 20%'.",
    "Keep resume length to one page unless you have 10+ years of experience.",
    "Tailor your resume to each job description by including relevant keywords.",
    "Use consistent formatting: bullet points, font size, spacing.",
    "Avoid vague terms like 'team player', focus on specific results.",
    "List technical skills and tools separately in a skills section.",
    "Start each bullet point with a powerful verb."
]


cv_guide_text = """
Every graduate student needs a curriculum vitae, or CV   
Your CV represents your accomplishments and experience as an academic and helps to establish your 
professional image.  Well before you apply for faculty positions, you will use your CV to apply for 
fellowships and grants, to accompany submissions for publications or conference papers, when being 
considered for leadership roles or consulting projects, and more.  CV's are also used when applying for 
some positions outside academia, such as in think tanks or research institutes, or for research positions in 
industry. 
As you progress through graduate school, you will, of course, add to your CV, but the basic areas to 
include are your contact information, education, research experience, teaching experience, publications, 
presentations, honors and awards, and contact information for your references, or those people willing to 
speak or write on your behalf.   
Some formatting pointers: 
 There is no single best format. Refer to samples for ideas, but craft your CV to best reflect you
 and your unique accomplishments.
  Unlike a resume, there is no page limit, but most graduate student's CVs are two to five pages in
 length.  Your CV may get no more than thirty seconds of the reader's attention, so ensure the
 most important information stands out. Keep it concise and relevant!
  Be strategic in how you order and entitle your categories.  The most important information
 should be on the first page.  Within each category, list items in reverse chronological order.
 Category headings influence how readers perceive you. For example, the same experience could
 belong in a category entitled: “Service to the Field,” “Conferences Organized,” or “Relevant
 Professional Experience.”
  Use active verbs and sentence fragments (not full sentences) to describe your experiences. Avoid
 pronouns (e.g. I, me), and minimize articles (a, and, the). Use a level of jargon most appropriate
 for your audience. Keep locations, dates and less important information on the right side of the
 page the left side should have important details like university, degree, job title, etc.
  Stick to a common font, such as Times New Roman, using a font size of 10 to 12 point. Use
 highlighting judiciously, favoring bold, ALL CAPS, and white space to create a crisp
 professional style.  Avoid text boxes, underlining, and shading; italics may be used in
 moderation. Margins should be equal on all four sides, and be ¾ to 1 inch in size.
  And most importantly…Follow the conventions of your field!  Different academic disciplines
 have different standards and expectations, especially in the order of categories.  Check out CVs
 from recent graduates of your department, and others in your field, to ensure you are following
 your field's norms.
 Tailor your CV to the position, purpose, or audience 
“Why should we select YOU?” - That is the question on the top of your reader's mind, so craft your CV 
to convince the reader that you have the skills, experience, and knowledge they seek. Depending on the 
purpose, you might place more or less emphasis on your teaching experience, for example. Also, keep 
an archival CV (for your eyes only!) that lists all the details of everything you've done - tailor from 
there.
"""

chunks = [chunk.strip() for chunk in re.split(r'\n\s*\n', cv_guide_text) if chunk.strip()]

embeddings = model.encode(chunks)
embeddings = np.array(embeddings).astype('float32')

index = faiss.IndexFlatL2(embeddings.shape[1])
index.add(embeddings)

faiss.write_index(index, "cv_guide.index")
with open("cv_guide_texts.pkl", "wb") as f:
    pickle.dump(chunks, f)

# from sentence_transformers import SentenceTransformer
# import faiss
# import numpy as np
# import pickle

# model = SentenceTransformer('all-MiniLM-L6-v2')

# embeddings = model.encode(ats_snippets)

# embeddings = np.array(embeddings).astype('float32')

# index = faiss.IndexFlatL2(embeddings.shape[1])
# index.add(embeddings)

# faiss.write_index(index, "ats_guidelines.index")
# with open("ats_texts.pkl", "wb") as f:
#     pickle.dump(ats_snippets, f)


In [14]:
def retrieve_cv_guidelines(query_text, top_k=3):
    query_embedding = model.encode([query_text]).astype("float32")
    index = faiss.read_index("cv_guide.index")
    with open("cv_guide_texts.pkl", "rb") as f:
        guide_chunks = pickle.load(f)

    distances, indices = index.search(query_embedding, top_k)
    return [guide_chunks[i] for i in indices[0]]

In [15]:
resume_json = {
    "status": "success",
    "message": "Resume extracted and classified successfully",
    "data": {
        "resumeId": "6803c9b23bfbae0fdb6f771c",
        "classification": {
            "contactInfo": {
                "name": "Anuj Singh",
                "email": "ok.anuj30@gmail.com",
                "phone": "9301783525",
                "address": "Mumbai",
                "linkedin": "linkedin.com/in/anujs101/"
            },
            "education": [
                "Bhartiya Vidya Bhavan Sardar Patel Institute of Technology (09/2023 - 08/2027), Bachelor of Technology - Computer Science and Engineering, Minor in Internet of Things (IoT)"
            ],
            "experience": [
                {
                    "role": "Head of Public Relations",
                    "organization": "Google Developer Student Club",
                    "duration": "Current",
                    "description": "Collaborated with team members to organize and promote events, leading to increased participation. Developed hands-on experience in event coordination, communication, and event promotion."
                },
                {
                    "role": "Senior Correspondent of Photography",
                    "organization": "Spark",
                    "duration": "Current",
                    "description": "Lead photography team documenting college events while coordinating visual storytelling across platforms. Collaborated with editorial team on content creation and train junior photographers on technical skills."
                }
            ],
            "projects": [
                {
                    "name": "Charcoal.AI - Generative AI Chatbot",
                    "description": "Developed an AI-powered chatbot integrating Generative AI APIs (Gemini) for secure and efficient interactions. Designed a user-friendly frontend using React, ensuring seamless user experience."
                },
                {
                    "name": "Token Launchpad",
                    "description": "Built a decentralized token launchpad enabling users to create and deploy tokens on Solana. Integrated smart contracts for token minting, fundraising (IDO/ICO), and liquidity management."
                },
                {
                    "name": "To-Do Application",
                    "description": "Built a RESTful API with Node.js and Express.js, integrating MongoDB for data storage. Built a responsive and interactive front-end using React, optimizing performance with efficient state management."
                }
            ],
            "skills": [
                "Java",
                "JavaScript",
                "C",
                "Python",
                "C++",
                "Node.js",
                "Express.js",
                "React",
                "Zod",
                "RESTful API Development",
                "API Testing (Postman)",
                "API Integration",
                "Solana dApp Development",
                "Web3.js",
                "Git/GitHub",
                "Effective communication",
                "Adaptability",
                "Team Leadership"
            ],
            "certifications": [],
            "achievements": [
                "Completed the MERN stack development cohort under the mentorship of Harkirat Singh",
                "Full Stack Development"
            ]
        },
        "isScannedDocument": False
    }
}

In [16]:
def flatten_resume_json(resume_json):
    classification = resume_json["data"]["classification"]
    parts = []

    # Contact Info
    contact = classification.get("contactInfo", {})
    parts.append(f"Name: {contact.get('name', '')}")
    parts.append(f"Email: {contact.get('email', '')}")
    parts.append(f"Phone: {contact.get('phone', '')}")
    parts.append(f"Address: {contact.get('address', '')}")
    parts.append(f"LinkedIn: {contact.get('linkedin', '')}")

    # Education
    education = classification.get("education", [])
    if education:
        parts.append("\nEducation:")
        for edu in education:
            parts.append(f"- {edu}")

    # Experience
    experience = classification.get("experience", [])
    if experience:
        parts.append("\nExperience:")
        for exp in experience:
            parts.append(f"- {exp['role']} at {exp['organization']} ({exp['duration']}): {exp['description']}")

    # Projects
    projects = classification.get("projects", [])
    if projects:
        parts.append("\nProjects:")
        for proj in projects:
            parts.append(f"- {proj['name']}: {proj['description']}")

    # Skills
    skills = classification.get("skills", [])
    if skills:
        parts.append("\nSkills: " + ", ".join(skills))

    # Certifications
    certs = classification.get("certifications", [])
    if certs:
        parts.append("\nCertifications:")
        for cert in certs:
            parts.append(f"- {cert}")

    # Achievements
    achievements = classification.get("achievements", [])
    if achievements:
        parts.append("\nAchievements:")
        for ach in achievements:
            parts.append(f"- {ach}")

    return "\n".join(parts)


In [26]:
print(flatten_resume_json(resume_json))

Name: Anuj Singh
Email: ok.anuj30@gmail.com
Phone: 9301783525
Address: Mumbai
LinkedIn: linkedin.com/in/anujs101/

Education:
- Bhartiya Vidya Bhavan Sardar Patel Institute of Technology (09/2023 - 08/2027), Bachelor of Technology - Computer Science and Engineering, Minor in Internet of Things (IoT)

Experience:
- Head of Public Relations at Google Developer Student Club (Current): Collaborated with team members to organize and promote events, leading to increased participation. Developed hands-on experience in event coordination, communication, and event promotion.
- Senior Correspondent of Photography at Spark (Current): Lead photography team documenting college events while coordinating visual storytelling across platforms. Collaborated with editorial team on content creation and train junior photographers on technical skills.

Projects:
- Charcoal.AI - Generative AI Chatbot: Developed an AI-powered chatbot integrating Generative AI APIs (Gemini) for secure and efficient interaction

In [17]:
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np

model = SentenceTransformer('all-MiniLM-L6-v2')

def embed_resume_for_future_matching(resume_text):
    emb = model.encode([resume_text]).astype("float32")
    index = faiss.IndexFlatL2(emb.shape[1])
    index.add(emb)
    faiss.write_index(index, "resume_vectors.index")

In [18]:
resume_text = flatten_resume_json(resume_json)
rag_context = retrieve_cv_guidelines(resume_text, top_k=3)

In [19]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import requests

def build_prompt(resume_text, rag_context):
    return f"""
You are a resume enhancement AI.

From the following raw resume data and RAG context, extract and rewrite content into structured professional resume sections: About, Skills, Experience, Education, Projects, Certifications, and Achievements.

Only return the enhanced resume content. Content should fit into a single page. Do NOT include any explanations, notes, or repeat the prompt.
=== RAG CONTEXT ===
{rag_context}
=== Resume Input ===
{resume_text}

=== Enhanced Resume ===
"""

In [20]:
import requests

GROQ_API_KEY = "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"

def query_groq(prompt, model="llama3-8b-8192"):
    url = "https://api.groq.com/openai/v1/chat/completions"
    headers = {
        "Authorization": f"Bearer {GROQ_API_KEY}",
        "Content-Type": "application/json"
    }
    payload = {
        "model": model,
        "messages": [
            {"role": "system", "content": "You are a resume enhancer AI. Output structured resume sections only."},
            {"role": "user", "content": prompt}
        ],
        "temperature": 0.7,
        "max_tokens": 1024
    }

    response = requests.post(url, headers=headers, json=payload)
    response.raise_for_status()
    return response.json()["choices"][0]["message"]["content"]


In [21]:
prompt = build_prompt(resume_text, rag_context)
enhanced_resume = query_groq(prompt)

In [22]:
def parse_enhanced_resume(raw_text):
    import re

    # Define section headings
    section_titles = [
        "About", "Skills", "Experience", "Education", "Projects",
        "Certifications", "Achievements"
    ]

    sections = {title.lower(): [] for title in section_titles}
    current_section = None

    lines = raw_text.strip().splitlines()
    for line in lines:
        # Detect section header
        match = re.match(r"\*\*(.*?)\*\*", line.strip())
        if match:
            header = match.group(1).strip()
            if header in section_titles:
                current_section = header.lower()
                continue

        # Store lines under the current section
        if current_section:
            content = line.strip("•").strip("-").strip()
            if content:
                sections[current_section].append(content)

    return sections

metadata = resume_json["data"]["classification"]["contactInfo"]
parsed_data = parse_enhanced_resume(enhanced_resume)
# resume_data = {
#     "name": metadata.get("name", ""),
#     "email": metadata.get("email", ""),
#     "phone": metadata.get("phone", ""),
#     "address": metadata.get("address", ""),
#     "linkedin": metadata.get("linkedin", ""),
#     "about": " ".join(parsed_data.get("about", [])) or "N/A",
#     "skills": parsed_data.get("skills", []),
#     "experience": [],  
#     "education": [], 
#     "projects": [],  
#     "certifications": parsed_data.get("certifications", []),
#     "achievements": parsed_data.get("achievements", []),
# }

resume_data = {
    "name": metadata.get("name", ""),
    "email": metadata.get("email", ""),
    "phone": metadata.get("phone", ""),
    "address": metadata.get("address", ""),
    "linkedin": metadata.get("linkedin", ""),
    "about": " ".join(parsed_data.get("about", [])) or "N/A",
    "skills": parsed_data.get("skills", []),
    "experience": parsed_data.get("experience", []),
    "education": parsed_data.get("education", []),
    "projects": parsed_data.get("projects", []),
    "certifications": parsed_data.get("certifications", []),
    "achievements": parsed_data.get("achievements", []),
}


from jinja2 import Environment, FileSystemLoader

def render_latex(resume_data, template_path="resume_template.tex"):
    env = Environment(loader=FileSystemLoader('.'))
    template = env.get_template(template_path)
    return template.render(resume_data)

latex_code = render_latex(resume_data)

with open("resume_output.tex", "w", encoding="utf-8") as f:
    f.write(latex_code)

In [29]:
import time
import serpapi
import json
def get_multiple_jobs_with_pagination(job_title, location):
    params = {
        "engine": "google_jobs",
        "q": job_title,
        "location": location,
        "api_key": '83c1ef3c99b32b05ab29da61937948e1cce626b355feb3c4c6ead197a08a7aac',
        "hl": "en",
        "gl": "in"
    }
    max_jobs = 5
    all_jobs = []
    next_page_token = None

    while len(all_jobs) < max_jobs:
        if next_page_token:
            params["next_page_token"] = next_page_token
        else:
            params.pop("next_page_token", None)

        search = serpapi.search(params)   # returns SerpResults (dict-like)
        data = search          

        jobs = data.get("jobs_results", [])
        all_jobs.extend(jobs)

        # Pagination
        serpapi_pagination = data.get("serpapi_pagination", {})
        next_page_token = serpapi_pagination.get("next_page_token")

        if not next_page_token:
            break

        time.sleep(1)

    all_jobs = all_jobs[:max_jobs]

    # result = {}
    # for idx, job in enumerate(all_jobs, 1):
    #     description = job.get('description', '')
    #     company_name = job.get('company_name', '')
    #     application_link = ""
    #     if 'apply_options' in job and job['apply_options']:
    #         application_link = job['apply_options'][0].get('link', '')
    #     elif 'via' in job:
    #         application_link = job['via']
    #     else:
    #         application_link = job.get('detected_extensions', {}).get('apply_link', '')
    #     result[f"{job_title} Opportunity {idx}"] = {
    #         "company_name": company_name,
    #         "description": description,
    #         "application_link": application_link
    #     }

    result = {}
    for idx, job in enumerate(all_jobs, 1):
        description = job.get('description', '')
        company_name = job.get('company_name', '')
        application_link = ""
        if 'apply_options' in job and job['apply_options']:
            application_link = job['apply_options'][0].get('link', '')
        elif 'via' in job:
            application_link = job['via']
        else:
            application_link = job.get('detected_extensions', {}).get('apply_link', '')
        
        actual_job_title = job.get('title', f"{job_title} Opportunity {idx}")
        result[actual_job_title] = {
            "company_name": company_name,
            "description": description,
            "application_link": application_link
        }

    return result


In [30]:
job_title = "Python Developer"
location = "India"
job_descriptions_json = get_multiple_jobs_with_pagination(job_title, location)

# Step 2: Load embedding model
model = SentenceTransformer("all-MiniLM-L6-v2")

# Step 3: Extract descriptions and metadata
descriptions = []
metadata = []

for title, data in job_descriptions_json.items():
    descriptions.append(data["description"])
    metadata.append({
        "title": title,
        "company_name": data["company_name"],
        "application_link": data["application_link"]
    })

# Step 4: Generate embeddings
embeddings = model.encode(descriptions)
embeddings_np = np.array(embeddings).astype("float32")  # FAISS requires float32

# Step 5: Create FAISS index and add embeddings
dimension = embeddings_np.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings_np)

# Optional: Save FAISS index
faiss.write_index(index, "job_faiss.index")

# Step 6: Save metadata for lookup
with open("job_faiss_metadata.json", "w") as f:
    json.dump(metadata, f, indent=2)


In [43]:
index = faiss.read_index("job_faiss.index")
with open("job_faiss_metadata.json", "r") as f:
    metadata = json.load(f)

# Load embedding model
model = SentenceTransformer("all-MiniLM-L6-v2")

In [44]:
user_resume_text = enhanced_resume

resume_embedding = model.encode([user_resume_text]).astype("float32")

In [45]:
top_k = 3
D, I = index.search(resume_embedding, top_k)

print("Top Matching Jobs:\n")
for idx in I[0]:
    job = metadata[idx]
    print(f"{job['title']} at {job['company_name']}\nLink: {job['application_link']}\n")

Top Matching Jobs:

smartSense Consulting Solutions - Python Developer - Server Side Component at smartSense Consulting Solutions
Link: https://in.linkedin.com/jobs/view/smartsense-consulting-solutions-python-developer-server-side-component-at-smartsense-consulting-solutions-4206287397?utm_campaign=google_jobs_apply&utm_source=google_jobs_apply&utm_medium=organic

Junior Python Developer at Dehazelabs
Link: https://jobs.ashbyhq.com/dehazelabs/3fc045c8-c43d-4523-b258-80e35b2930ed?utm_campaign=google_jobs_apply&utm_source=google_jobs_apply&utm_medium=organic

Python+Snowflake Developer - SA/M - GO/JC/21441/2025 at Golden Opportunities
Link: https://in.linkedin.com/jobs/view/python%2Bsnowflake-developer-sa-m-go-jc-21441-2025-at-golden-opportunities-4204686847?utm_campaign=google_jobs_apply&utm_source=google_jobs_apply&utm_medium=organic



In [46]:
job_desc_text = "\n\n".join([metadata[i]["title"] + ":\n" + descriptions[i] for i in I[0]])

rag_prompt = f"""
You are a career advisor AI. The following is a candidate's resume:

--- RESUME ---
{user_resume_text}

These are the job descriptions of top matches:

--- JOB DESCRIPTIONS ---
{job_desc_text}

1. Identify what technical or domain-specific skills the candidate is missing.
2. Recommend a step-by-step learning path (with topics/tools/technologies) to bridge the gap.
3. Suggest resources (platforms or certifications) for each skill if possible.
"""

In [47]:
learning_path = query_groq(rag_prompt)

In [51]:
def generate_cover_letter(user_resume, selected_job_title, selected_job_description, company_name):
    prompt = f"""
Write a personalized and professional cover letter for the position of "{selected_job_title}" at {company_name}.
The letter should be 3-4 paragraphs, tailored to the job description below, and should highlight how the candidate's skills align with the company's requirements.

--- Candidate's Resume ---
{user_resume}

--- Job Description ---
{selected_job_description}

Ensure the tone is confident, enthusiastic, and formal. Avoid generic phrases. Mention specific skills or experiences from the resume that match the job. End with a call to action and interest in an interview.
Begin your response directly from the actual response, no need to give headers like 'Here is your generated cover letter'.
"""

    return query_groq(prompt, model="llama3-8b-8192")


In [52]:
# Save the cover letter to a .txt file
def save_cover_letter_to_txt(cover_letter_text, filename="cover_letter.txt"):
    with open(filename, "w", encoding="utf-8") as f:
        f.write(cover_letter_text)
    print(f"Cover letter saved as '{filename}'")

In [53]:
# Let's say the user selects job index 1 (second from search results)
selected_index = I[0][1]
selected_job = metadata[selected_index]

selected_title = selected_job.get("title", "Python Developer")  # fallback
selected_description = descriptions[selected_index]
selected_company = selected_job["company_name"]

cover_letter = generate_cover_letter(
    user_resume=user_resume_text,
    selected_job_title=selected_title,
    selected_job_description=selected_description,
    company_name=selected_company
)

save_cover_letter_to_txt(cover_letter)


Cover letter saved as 'cover_letter.txt'
