In [13]:
import warnings
warnings.filterwarnings('ignore')

import google.generativeai as genai
import os
from pathlib import Path
from dotenv import load_dotenv
import pandas as pd
import json 
from typing import List, Optional
from langchain_google_genai import GoogleGenerativeAI
from langchain_core.prompts import PromptTemplate
#from langchain_core.pydantic_v1 import BaseModel, Field
from pydantic import BaseModel, Field

from langchain.output_parsers import PydanticOutputParser, OutputFixingParser
from langchain_community.document_loaders import PyPDFLoader
from langchain.document_loaders import UnstructuredWordDocumentLoader
from langchain.document_loaders import Docx2txtLoader

load_dotenv()

genai.configure(api_key=os.environ["GEMINI_API_KEY"])

In [5]:
# Test code to convert pdf to text 
def parse_resume_from_pdf(pdf_path):
    loader = PyPDFLoader(pdf_path)
    doc = loader.load()
    resume_text = ""
    for page in doc:
        resume_text+= page.page_content
    return resume_text 

# parse_resume_from_pdf("/nfs/home/scg1143/ATSChecker/sample_data/resume.pdf")

In [10]:
data_dir = Path.cwd().parent / 'data'
resume_data = pd.read_json(data_dir / 'resume_data.json', lines=True)

In [6]:
model = genai.GenerativeModel("models/gemini-1.5-pro")

In [165]:
llm = GoogleGenerativeAI(model="gemini-1.5-pro", google_api_key=os.environ["GEMINI_API_KEY"])

In [106]:
# Reference: https://github.com/611noorsaeed/Resume-Parsing-with-Google-Gemini-LLM-Automated-Resume-Parsing-Form-Filling-with-AI/blob/main/app.py
prompt_template = """
    You are a resume parsing assistant. 
    Given the following resume text, extract all the important details like name, contact information/email,
    education, work experience, skills, year of experience, suggested_resume_category and recommendeded_job_roles
    If any detail is not found, just skip that and don't provide any reasoning for anything.
    
    The resume text:
    {resume}
    """

In [157]:
class EducationEntry(BaseModel):
    degree: Optional[str] = Field(default=None, description="Name of the degree obtained, e.g., Bachelor of Science, Master of Engineering.")
    institution: Optional[str] = Field(default=None, description="Name of the institution where the degree was pursued.")

class SkillsEntry(BaseModel):
    technical: List[str] = Field(default_factory=list, description="Technical Skills")
    non_technical: List[str] = Field(default_factory=list, description="Non-Technical or Soft Skills")

class WorkExperienceEntry(BaseModel):
    company: Optional[str] = Field(default=None, description="Name of the company candidate has worked at")
    role: Optional[str] = Field(default=None, description="Job role at the company")
    start_date: Optional[str] = Field(default=None, description="Start date of the Job.")
    end_date: Optional[str] = Field(default=None, description="End date of the Job.")

class ResumeContent(BaseModel):
    name: Optional[str] = Field(default=None, description="Name of the candidate")
    skills: Optional[SkillsEntry] = Field(default_factory=SkillsEntry, description="Technical and Non-Technical Skills of the candidate")
    education: List[EducationEntry] = Field(default_factory=list, description="List of educational qualifications, where each entry contains details about degree, institution, and dates.")
    work_experience: List[WorkExperienceEntry] = Field(default_factory=list, description="List of work experiences, where each experience include company name, role, start date and end date.")
    suggested_resume_category: Optional[str] = Field(default=None, description="Suggested Resume Category (based on the skills and experience)") 
    recommendeded_job_roles: List[str] = Field(default_factory=list, description="Recommended Job Roles (based on the candidate's skills and experience)")
    years_of_experience: Optional[int] = Field(default=None, description="Years of Experience, calculated from work experience")

In [170]:
parser = PydanticOutputParser(pydantic_object=ResumeContent)
new_parser = OutputFixingParser.from_llm(parser=parser, llm=llm)

prompt = PromptTemplate(
    template=prompt_template,
    input_variables=["resume"],
    partial_variables={"format_instructions": new_parser.get_format_instructions()},
)

In [171]:
chain = prompt | llm | new_parser

In [160]:
resume_content = resume_data.content.iloc[10]
output = chain.invoke({'resume' : resume_content})
output

ResumeContent(name='Asish Ratha', skills=SkillsEntry(technical=['Invoice', 'posting', 'Invoice processing', 'sap posting', 'vendor call attend and resolve the issue', 'meet SLA tat', 'working with client tool'], non_technical=['TRAINING', 'Team handling', 'new joiners training']), education=[EducationEntry(degree=None, institution='Berhampur university, Khallikote autonomous college - Brahmapur, Orissa')], work_experience=[WorkExperienceEntry(company='Accenture', role='Subject matter Expert', start_date='March 2012', end_date='Present')], suggested_resume_category='Accounting/Finance, Customer Service, Operations', recommendeded_job_roles=['Subject Matter Expert', 'Invoice Processing Specialist', 'Trainer', 'Team Lead', 'Client Support Specialist'], years_of_experience=11)

In [163]:
output.education[0].degree

In [166]:
resume_content = resume_data.content.iloc[20]
output = chain.invoke({'resume' : resume_content})
output

ResumeContent(name='Govardhana K', skills=SkillsEntry(technical=['APEX', 'Data Structures', 'FLEXCUBE', 'Oracle', 'Algorithms', 'Core Java', 'Go Lang', 'Oracle PL-SQL programming', 'Sales Force with APEX', 'RADTool', 'Jdeveloper', 'NetBeans', 'Eclipse', 'SQL developer', 'PL/SQL Developer', 'WinSCP', 'Putty', 'JavaScript', 'XML', 'HTML', 'Webservice', 'Linux', 'Windows', 'SVN', 'GitHub', 'Web logic', 'OC4J'], non_technical=[]), education=[EducationEntry(degree='B.E in Computer Science Engineering', institution='Adithya Institute of Technology, Tamil Nadu')], work_experience=[WorkExperienceEntry(company='Cloud Lending Solutions', role='Senior Software Engineer', start_date='January 2018', end_date='Present'), WorkExperienceEntry(company='Oracle', role='Senior Consultant', start_date='November 2016', end_date='December 2017'), WorkExperienceEntry(company='Oracle', role='Staff Consultant', start_date='January 2014', end_date='October 2016'), WorkExperienceEntry(company='Oracle', role='Asso

In [172]:
resume_content = resume_data.content.iloc[25]
output = chain.invoke({'resume' : resume_content})
output

ResumeContent(name='Jay Madhavi', skills=SkillsEntry(technical=['SQL', 'Oracle', 'Android Application Development', 'Code Optimization', 'Website Development'], non_technical=[]), education=[EducationEntry(degree='BE (Computer Science)', institution='Saraswati College Of Engineering, Kharghar - Mumbai, Maharashtra'), EducationEntry(degree='HSC', institution='Acharya College Chembur - Mumbai, Maharashtra'), EducationEntry(degree='SSC', institution='State Board')], work_experience=[WorkExperienceEntry(company='NIIT', role=None, start_date='2016', end_date='2016'), WorkExperienceEntry(company='SQL Oracle', role=None, start_date='2016', end_date='2016'), WorkExperienceEntry(company='MSCIT', role=None, start_date='2011', end_date='2011')], suggested_resume_category='Entry Level/ Fresher', recommendeded_job_roles=[], years_of_experience=None)

In [174]:
resume_content = resume_data.content.iloc[100]
output = chain.invoke({'resume' : resume_content})
output

ResumeContent(name='Yasothai Jayaramachandran', skills=SkillsEntry(technical=['Automation', 'Regression', 'Sanity', 'Manual Testing', 'Selenium Webdriver', 'Python', 'Pycharm', 'Eclipse', 'Pydev plugin', 'TCL Programming', 'Agile development', 'Scrum Master', 'optimization', 'router', 'Testing', 'Power Query', 'SQL', 'Shell Scripting', 'C#', 'Networking', 'WAAS', 'WAE', 'WCCP Protocol', 'ATS', 'Selenium Testing Framework', 'CDETS', 'Rally', 'eARMS', 'ACME', 'TIMS', 'HTMLTestRunner', 'ARAS PLM', 'AML Studio', 'Windows XP/10', 'Linux'], non_technical=[]), education=[EducationEntry(degree='B.E in CSE', institution='ANNA University'), EducationEntry(degree=None, institution='Day Adventist Matric Hr.Sec School')], work_experience=[WorkExperienceEntry(company='Cisco Systems', role='Lead Engineer - Automation & Testing', start_date='January 2014', end_date='June 2015'), WorkExperienceEntry(company='Cisco Systems', role='Member Technical Staff - Automation & Testing', start_date='January 2013'