In [1]:
import warnings
warnings.filterwarnings('ignore')


import os
from pathlib import Path
from dotenv import load_dotenv
import pandas as pd
import json 
from typing import List, Optional
from langchain_google_genai import GoogleGenerativeAI
from langchain_core.prompts import PromptTemplate
#from langchain_core.pydantic_v1 import BaseModel, Field
from pydantic import BaseModel, Field

from langchain.output_parsers import PydanticOutputParser, OutputFixingParser
from langchain_community.document_loaders import PyPDFLoader
from langchain.document_loaders import UnstructuredWordDocumentLoader
from langchain.document_loaders import Docx2txtLoader

load_dotenv()

True

In [18]:
data_dir = Path.cwd().parent.parent / 'sample_data'

In [32]:
def load_resume_from_pdf(pdf_path):
    loader = PyPDFLoader(pdf_path)
    doc = loader.load()
    resume_text = ""
    for page in doc:
        resume_text+= page.page_content
    return resume_text

def load_resume_from_docx(docx_path):
    loader = UnstructuredWordDocumentLoader(docx_path)
    doc = loader.load()
    resume_text = ""
    for page in doc:
        resume_text+= page.page_content
    return resume_text

In [26]:
prompt_template = """
Act as an advanced Applicant Tracking System (ATS) with extensive expertise in the tech industry, specifically in software engineering, data science, data analysis, and big data engineering. Your task is to meticulously evaluate the provided resume against the given job description.

Consider the following:
1. The job market is highly competitive, so provide detailed, actionable feedback to improve the resume.
2. Analyze the resume for both hard skills (technical abilities) and soft skills (interpersonal qualities).
3. Consider the candidate's experience level and how it aligns with the job requirements.
4. Evaluate the resume's formatting and structure for ATS compatibility.
5. Assess the clarity and impact of the candidate's achievements and responsibilities.

Please provide the following:
1. An accurate percentage match between the resume and the job description.
2. A comprehensive list of matching keywords and skills found in both the resume and job description
3. A comprehensive list of missing keywords and skills from the job description.
4. Suggestions for improving the resume to better match the job description.
5. A brief profile summary of the candidate based on their resume.
6. Top 3 strengths of the candidate relevant to the job description.
7. Top 3 areas for improvement or skills to develop.

Resume: {text}
Job Description: {jd}
"""

class ResumeScore(BaseModel):
    perc_match: int = Field(description="Percentage match between the job description and the resume (0-100)")
    matching_keywords: List[str] = Field(description="List of important keywords or skills found in both the resume and job description")
    missing_keywords: List[str] = Field(description="List of important keywords or skills from the job description missing in the resume")
    improvement_suggestions: List[str] = Field(description="Specific suggestions to improve the resume for better alignment with the job description")
    profile_summary: str = Field(description="Concise profile summary of the candidate based on their entire resume")
    top_strengths: List[str] = Field(description="Top 3 strengths of the candidate relevant to the job description")
    areas_for_improvement: List[str] = Field(description="Top 3 areas for improvement or skills to develop for better job fit")
    ats_compatibility_score: int = Field(description="Score for how well the resume is formatted for ATS systems (0-100)")
    experience_level_match: str = Field(description="Assessment of how well the candidate's experience level matches the job requirements")

In [3]:
llm = GoogleGenerativeAI(model="gemini-1.5-pro", 
                        google_api_key=os.environ["GEMINI_API_KEY"])

In [27]:
parser = PydanticOutputParser(pydantic_object=ResumeScore)
new_parser = OutputFixingParser.from_llm(parser=parser, llm=llm)

prompt = PromptTemplate(
    template=prompt_template,
    input_variables=["text", "jd"],
    partial_variables={"format_instructions": new_parser.get_format_instructions()},
)

llm_chain = prompt | llm | new_parser

In [19]:
resume_path = data_dir / 'resume.pdf'
resume_text = load_resume_from_pdf(resume_path)

In [28]:
with open(data_dir / 'jd1.txt') as f:
    jd = f.read()

In [29]:
out = llm_chain.invoke({"text" : resume_text, "jd" : jd})

In [38]:
resume_path = data_dir / 'software-engineer-resume.docx'
resume_text = load_resume_from_docx(str(resume_path))

out = llm_chain.invoke({"text" : resume_text, "jd" : jd})

In [39]:
out

ResumeScore(perc_match=45, matching_keywords=['Python', 'JavaScript', 'API', 'Database Theory', 'MongoDB', 'MySQL', 'Git', 'Agile', 'Debugging', 'Monitoring'], missing_keywords=['MLOps', 'Real-time ML/LLM deployment & evaluation', 'Large Language Models (LLMs)', 'RAG frameworks', 'Agentic workflows', 'Real-time data processing/monitoring tools', 'Observability best practices for LLMs', 'LLM management/optimization platforms (LangChain, Hugging Face)', 'Data engineering pipelines for real-time input-output logging/analysis', 'Experience building production-quality ML/AI systems'], improvement_suggestions=['Rewrite Summary: Focus on skills relevant to the job description. Quantify achievements and highlight experience related to AI/ML, data processing, or any tangential experience. Remove generic phrases like "profit-oriented" and focus on technical strengths.', 'Add Skills Section: Create a dedicated "Skills" section listing all relevant keywords, both technical and soft. Include progra