In [65]:
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
import pickle
import re
from tqdm.notebook import tqdm
import requests
from jinja2 import Environment, FileSystemLoader
import time
import serpapi
import json

In [66]:
model = SentenceTransformer("all-MiniLM-L6-v2")

ats_snippets = [
    "Use action verbs like 'Led', 'Managed', 'Developed', instead of passive phrases.",
    "Quantify your achievements, e.g., 'increased sales by 20%'.",
    "Keep resume length to one page unless you have 10+ years of experience.",
    "Tailor your resume to each job description by including relevant keywords.",
    "Use consistent formatting: bullet points, font size, spacing.",
    "Avoid vague terms like 'team player', focus on specific results.",
    "List technical skills and tools separately in a skills section.",
    "Start each bullet point with a powerful verb."
]

chunks = [chunk.strip() for chunk in ats_snippets if chunk.strip()]

embeddings = model.encode(chunks)
embeddings = np.array(embeddings).astype('float32')

index = faiss.IndexFlatL2(embeddings.shape[1])
index.add(embeddings)

faiss.write_index(index, "cv_guide.index")
with open("cv_guide_texts.pkl", "wb") as f:
    pickle.dump(chunks, f)

In [67]:
def retrieve_cv_guidelines(query_text, top_k=3):
    query_embedding = model.encode([query_text]).astype("float32")
    index = faiss.read_index("cv_guide.index")
    with open("cv_guide_texts.pkl", "rb") as f:
        guide_chunks = pickle.load(f)

    distances, indices = index.search(query_embedding, top_k)
    return [guide_chunks[i] for i in indices[0]]

In [68]:
resume_json = {
    "status": "success",
    "message": "Resume extracted and classified successfully",
    "data": {
        "resumeId": "6803c9b23bfbae0fdb6f771c",
        "classification": {
            "contactInfo": {
                "name": "Anuj Singh",
                "email": "ok.anuj30@gmail.com",
                "phone": "9301783525",
                "address": "Mumbai",
                "linkedin": "linkedin.com/in/anujs101/"
            },
            "education": [
                "Bhartiya Vidya Bhavan Sardar Patel Institute of Technology (09/2023 - 08/2027), Bachelor of Technology - Computer Science and Engineering, Minor in Internet of Things (IoT)"
            ],
            "experience": [
                {
                    "role": "Head of Public Relations",
                    "organization": "Google Developer Student Club",
                    "duration": "Current",
                    "description": "Collaborated with team members to organize and promote events, leading to increased participation. Developed hands-on experience in event coordination, communication, and event promotion."
                },
                {
                    "role": "Senior Correspondent of Photography",
                    "organization": "Spark",
                    "duration": "Current",
                    "description": "Lead photography team documenting college events while coordinating visual storytelling across platforms. Collaborated with editorial team on content creation and train junior photographers on technical skills."
                }
            ],
            "projects": [
                {
                    "name": "Charcoal.AI - Generative AI Chatbot",
                    "description": "Developed an AI-powered chatbot integrating Generative AI APIs (Gemini) for secure and efficient interactions. Designed a user-friendly frontend using React, ensuring seamless user experience."
                },
                {
                    "name": "Token Launchpad",
                    "description": "Built a decentralized token launchpad enabling users to create and deploy tokens on Solana. Integrated smart contracts for token minting, fundraising (IDO/ICO), and liquidity management."
                },
                {
                    "name": "To-Do Application",
                    "description": "Built a RESTful API with Node.js and Express.js, integrating MongoDB for data storage. Built a responsive and interactive front-end using React, optimizing performance with efficient state management."
                }
            ],
            "skills": [
                "Java",
                "JavaScript",
                "C",
                "Python",
                "C++",
                "Node.js",
                "Express.js",
                "React",
                "Zod",
                "RESTful API Development",
                "API Testing (Postman)",
                "API Integration",
                "Solana dApp Development",
                "Web3.js",
                "Git/GitHub",
                "Effective communication",
                "Adaptability",
                "Team Leadership"
            ],
            "certifications": [],
            "achievements": [
                "Completed the MERN stack development cohort under the mentorship of Harkirat Singh",
                "Full Stack Development"
            ]
        },
        "isScannedDocument": False
    }
}

In [69]:
def flatten_resume_json(resume_json):
    classification = resume_json["data"]["classification"]
    parts = []

    # Contact Info
    contact = classification.get("contactInfo", {})
    parts.append(f"Name: {contact.get('name', '')}")
    parts.append(f"Email: {contact.get('email', '')}")
    parts.append(f"Phone: {contact.get('phone', '')}")
    parts.append(f"Address: {contact.get('address', '')}")
    parts.append(f"LinkedIn: {contact.get('linkedin', '')}")

    # Education
    education = classification.get("education", [])
    if education:
        parts.append("\nEducation:")
        for edu in education:
            parts.append(f"- {edu}")

    # Experience
    experience = classification.get("experience", [])
    if experience:
        parts.append("\nExperience:")
        for exp in experience:
            parts.append(f"- {exp['role']} at {exp['organization']} ({exp['duration']}): {exp['description']}")

    # Projects
    projects = classification.get("projects", [])
    if projects:
        parts.append("\nProjects:")
        for proj in projects:
            parts.append(f"- {proj['name']}: {proj['description']}")

    # Skills
    skills = classification.get("skills", [])
    if skills:
        parts.append("\nSkills: " + ", ".join(skills))

    # Certifications
    certs = classification.get("certifications", [])
    if certs:
        parts.append("\nCertifications:")
        for cert in certs:
            parts.append(f"- {cert}")

    # Achievements
    achievements = classification.get("achievements", [])
    if achievements:
        parts.append("\nAchievements:")
        for ach in achievements:
            parts.append(f"- {ach}")

    return "\n".join(parts)

In [70]:
def embed_resume_for_future_matching(resume_text):
    emb = model.encode([resume_text]).astype("float32")
    index = faiss.IndexFlatL2(emb.shape[1])
    index.add(emb)
    faiss.write_index(index, "resume_vectors.index")

In [71]:
def build_prompt(resume_json):
    resume_text = flatten_resume_json(resume_json)
    rag_context = retrieve_cv_guidelines(resume_text, top_k=3)
    embed_resume_for_future_matching(resume_text)

    return f"""
        You are a resume enhancement AI.

        From the following raw resume data and RAG context, extract and rewrite content into structured professional resume sections: About, Skills, Experience, Education, Projects, Certifications, and Achievements.

        Only return the enhanced resume content. Content should fit into a single page. Do NOT include any explanations, notes, or repeat the prompt.
        === RAG CONTEXT ===
        {rag_context}
        === Resume Input ===
        {resume_text}

        === Enhanced Resume ===
    """

In [72]:
def query_groq(prompt, model="llama3-8b-8192"):
    GROQ_API_KEY = "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"

    url = "https://api.groq.com/openai/v1/chat/completions"
    headers = {
        "Authorization": f"Bearer {GROQ_API_KEY}",
        "Content-Type": "application/json"
    }
    payload = {
        "model": model,
        "messages": [
            {"role": "system", "content": "You are a resume enhancer AI. Output structured resume sections only."},
            {"role": "user", "content": prompt}
        ],
        "temperature": 0.7,
        "max_tokens": 1024
    }

    response = requests.post(url, headers=headers, json=payload)
    response.raise_for_status()
    return response.json()["choices"][0]["message"]["content"]

In [73]:
def parse_enhanced_resume(resume_json):
    enhancement_prompt = build_prompt(resume_json)
    raw_text = query_groq(enhancement_prompt)

    metadata = resume_json["data"]["classification"]["contactInfo"]
    section_titles = [
        "About", "Skills", "Experience", "Education", "Projects",
        "Certifications", "Achievements"
    ]

    sections = {title.lower(): [] for title in section_titles}
    current_section = None

    lines = raw_text.strip().splitlines()
    for line in lines:
        # Detect section headers like **Skills**
        match = re.match(r"\*\*(.*?)\*\*", line.strip())
        if match:
            header = match.group(1).strip()
            if header in section_titles:
                current_section = header.lower()
                continue

        # Store content lines under the current section
        if current_section:
            content = line.strip("•").strip("-").strip()
            if content:
                sections[current_section].append(content)

    # Now build the final resume JSON
    def get_single_line(section_name):
        items = sections.get(section_name.lower(), [])
        return items[0] if items else ""

    def clean_items(items):
        if items is None:
            return []
        cleaned = []
        for item in items:
            item = item.lstrip('*+•- ').strip()
            cleaned.append(item)
        return cleaned

    parsed_resume = {
        "name": metadata.get("name", ""),
        "email": metadata.get("email", ""),
        "phone": metadata.get("phone", ""),
        "address": metadata.get("address", ""),
        "linkedin": metadata.get("linkedin", ""),
        "about": get_single_line("About"),
        "skills": clean_items(sections.get("skills", [])),
        "experience": clean_items(sections.get("experience", [])),
        "education": clean_items(sections.get("education", [])),
        "projects": clean_items(sections.get("projects", [])),
        "certifications": clean_items(sections.get("certifications", [])),
        "achievements": clean_items(sections.get("achievements", []))
    }

    return parsed_resume

In [74]:
def render_latex(resume_json, template_path="resume_template.tex"):
    resume_data = parse_enhanced_resume(resume_json)
    
    env = Environment(loader=FileSystemLoader('.'))
    template = env.get_template(template_path)
    latex_code =  template.render(resume_data)
    with open("resume_output.tex", "w", encoding="utf-8") as f:
        f.write(latex_code)
    return latex_code

In [75]:
def get_multiple_jobs_with_pagination(job_title, location):
    params = {
        "engine": "google_jobs",
        "q": job_title,
        "location": location,
        "api_key": '83c1ef3c99b32b05ab29da61937948e1cce626b355feb3c4c6ead197a08a7aac',
        "hl": "en",
        "gl": "in"
    }
    max_jobs = 5
    all_jobs = []
    next_page_token = None

    while len(all_jobs) < max_jobs:
        if next_page_token:
            params["next_page_token"] = next_page_token
        else:
            params.pop("next_page_token", None)

        search = serpapi.search(params)   # returns SerpResults (dict-like)
        data = search          

        jobs = data.get("jobs_results", [])
        all_jobs.extend(jobs)

        # Pagination
        serpapi_pagination = data.get("serpapi_pagination", {})
        next_page_token = serpapi_pagination.get("next_page_token")

        if not next_page_token:
            break

        time.sleep(1)

    all_jobs = all_jobs[:max_jobs]

    result = {}
    for idx, job in enumerate(all_jobs, 1):
        description = job.get('description', '')
        company_name = job.get('company_name', '')
        application_link = ""
        if 'apply_options' in job and job['apply_options']:
            application_link = job['apply_options'][0].get('link', '')
        elif 'via' in job:
            application_link = job['via']
        else:
            application_link = job.get('detected_extensions', {}).get('apply_link', '')
        
        actual_job_title = job.get('title', f"{job_title} Opportunity {idx}")
        result[actual_job_title] = {
            "company_name": company_name,
            "description": description,
            "application_link": application_link
        }

    return result

In [None]:
job_title = "Python Developer"
location = "India"

def embed_job_data(job_title, location):
    job_descriptions_json = get_multiple_jobs_with_pagination(job_title, location)

    descriptions = []
    metadata = []

    for title, data in job_descriptions_json.items():
        description = data.get("description", "")
        descriptions.append(description)

        metadata.append({
            "title": title,
            "company_name": data.get("company_name", ""),
            "application_link": data.get("application_link", ""),
            "description": description  # 🔥 also saving description inside metadata now
        })

    # Step 4: Generate embeddings
    embeddings = model.encode(descriptions)
    embeddings_np = np.array(embeddings).astype("float32")  # FAISS requires float32

    # Step 5: Create FAISS index and add embeddings
    dimension = embeddings_np.shape[1]
    index = faiss.IndexFlatL2(dimension)
    index.add(embeddings_np)

    # Optional: Save FAISS index
    faiss.write_index(index, "job_faiss.index")

    # Step 6: Save metadata (with descriptions) for lookup
    with open("job_faiss_metadata.json", "w", encoding="utf-8") as f:
        json.dump(metadata, f, indent=2, ensure_ascii=False)


In [96]:
def match_jobs(resume_json):
    enhanced_resume = parse_enhanced_resume(resume_json)
    render_latex(resume_json)

    index = faiss.read_index("job_faiss.index")
    
    with open("job_faiss_metadata.json", "r", encoding="utf-8") as f:
        metadata = json.load(f)
    
    model = SentenceTransformer("all-MiniLM-L6-v2")  # Ensure model is loaded
    resume_embedding = model.encode([enhanced_resume]).astype("float32")

    top_k = 3
    D, I = index.search(resume_embedding, top_k)

    matched_jobs = []

    for idx in I[0]:
        job = metadata[idx]
        matched_jobs.append({
            "title": job.get('title', ''),
            "company_name": job.get('company_name', ''),
            "application_link": job.get('application_link', ''),
            "description": job.get('description', '')  # 🔥 Include description now
        })
    
    return json.dumps({"matched_jobs": matched_jobs}, indent=2, ensure_ascii=False)

In [97]:
def generate_learning_path(resume_json) :
    enhanced_resume = parse_enhanced_resume(resume_json)
    job_desc = match_jobs(resume_json)
    rag_prompt = f"""
        You are a career advisor AI. The following is a candidate's resume:

        --- RESUME ---
        {enhanced_resume}

        These are the job descriptions of top matches:

        --- JOB DESCRIPTIONS ---
        {job_desc}

        1. Identify what technical or domain-specific skills the candidate is missing.
        2. Recommend a step-by-step learning path (with topics/tools/technologies) to bridge the gap.
        3. Suggest resources (platforms or certifications) for each skill if possible.
    """
    
    return query_groq(rag_prompt)

In [98]:
learning_path = generate_learning_path(resume_json)

In [99]:
def generate_cover_letter(resume_json, selected_job_title, selected_job_description, company_name):
    enhance_resume = parse_enhanced_resume(resume_json)
    prompt = f"""
        Write a personalized and professional cover letter for the position of "{selected_job_title}" at {company_name}.
        The letter should be 3-4 paragraphs, tailored to the job description below, and should highlight how the candidate's skills align with the company's requirements.

        --- Candidate's Resume ---
        {enhance_resume}

        --- Job Description ---
        {selected_job_description}

        Ensure the tone is confident, enthusiastic, and formal. Avoid generic phrases. Mention specific skills or experiences from the resume that match the job. End with a call to action and interest in an interview.
        Begin your response directly from the actual response, no need to give headers like 'Here is your generated cover letter'.
    """

    return query_groq(prompt, model="llama3-8b-8192")

In [100]:
# Save the cover letter to a .txt file
def save_cover_letter_to_txt(cover_letter_text, filename="cover_letter.txt"):
    with open(filename, "w", encoding="utf-8") as f:
        f.write(cover_letter_text)
    print(f"Cover letter saved as '{filename}'")

In [None]:
matched_jobs = match_jobs(resume_json)

{
  "matched_jobs": [
    {
      "title": "Principal Python Developer",
      "company_name": "Michael Page",
      "application_link": "https://www.michaelpage.co.in/job-detail/principal-python-developer/ref/jn-022025-6664599?utm_campaign=google_jobs_apply&utm_source=google_jobs_apply&utm_medium=organic",
      "description": "• Fast track growth & PnL ownership\n• Handling multiple clients\n\nAbout Our Client\n\nOur client is a renowned name in the industrial automation space.\n\nJob Description\n• Full Stack Development: Lead the design and development of end-to-end software solutions with a focus on server-side Python development and client-side Vue.js development.\n• Architecture and Design: Collaborate with cross-functional teams to design complex software systems meeting business requirements and performance goals.\n• Code Review and Quality Assurance: Conduct thorough code reviews to ensure code quality, adherence to best practices and consistency. Implement and enforce coding

In [103]:
matched_jobs = json.loads(matched_jobs)

#Simulate user selecting a job
selected_index = 0  
selected_job = matched_jobs["matched_jobs"][selected_index]

selected_title = selected_job["title"]
selected_company = selected_job["company_name"]
selected_description = selected_job["description"]

# Generate the cover letter
cover_letter = generate_cover_letter(
    resume_json=resume_json,
    selected_job_title=selected_title,
    selected_job_description=selected_description,
    company_name=selected_company
)

# 5. Save the cover letter to a file
def save_cover_letter_to_txt(cover_letter, filename="cover_letter.txt"):
    with open(filename, "w", encoding="utf-8") as f:
        f.write(cover_letter)

save_cover_letter_to_txt(cover_letter)