In [1]:
import openai
from dotenv import load_dotenv, find_dotenv
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.callbacks import get_openai_callback
import pandas as pd
from langchain.output_parsers import ResponseSchema
from langchain.output_parsers import StructuredOutputParser
from json.decoder import JSONDecodeError
import pypdf
from langchain_community.document_loaders import PyPDFLoader, Docx2txtLoader
import os
from dotenv import load_dotenv, find_dotenv
from langchain.callbacks import get_openai_callback


In [2]:
def prepare_LLM(llm_model = "gpt-3.5-turbo"):
    llm_model = llm_model
    chat = ChatOpenAI(temperature=0.0, model=llm_model)
    return chat


In [4]:
load_dotenv()
api_key = os.environ.get("OPENAI_API_KEY")



In [21]:
loader = PyPDFLoader("test_folder/Resume (1).pdf")
resume = loader.load_and_split()



In [21]:
loader = Docx2txtLoader("resume_doctest.docx")
resume = loader.load()

[Document(page_content='Josue Martinez\n\nNew Jersey, NJ | 201-969-7937 | Josuelm070@gmail.com\n\nhttps://www.linkedin.com/in/josue-martinez1/\n\n\n\nSUMMARY \n\nHighly motivated, strategic-minded, and results-driven Business Development Specialist and Acquisitions Specialist with versatile Residential Real Estate underwriting, marketing, and client relationship management expertise. Experienced in high-load, stress, and deadline-driven environments. Proven capacity to utilize superb communication skills along with a stellar ability to quickly build rapport with team members and clients and efficiently partake in a firm’s operation working towards growth, continuous improvement, organizational enhancement, and overall success.\n\nRELEVANT EXPERIENCE \n\nLeverage Companies, Newark, NJ \t\t\t\t\t         03/2023 - 08/2023\n\nBusiness Development Consultant\n\nDrove over $500,000 in expansion revenue with our new agent outreach strategy.\n\nDeveloped processes for lead generation leading 

In [18]:
chat = prepare_LLM()

In [30]:
def prepare_template_string():
    
    template_string = """
    You are acting as a recruiting assistant. Your job is to carefully scan resumes and \
    identify the best candidates for the role. In order to do this, you will first need to \
    understand the job description, the required skills/experience, and the desired \
    skills\experience. You will be asked to first extract relevant information from each resume, \
    and then to rank the candidates fit on a scale from 1 - 10. 
 
    
    The job description, desired level of education, and desired skills/experience are delineated \
    by the triple backticks below. The candidate resume will be provided after this, and format instructions \
    will also be provided.
    
    ``` 
    Job description: {job_description}, Desired education level: {desired_education}, Desired experience: {desired_experience}, Desired skills: {desired_skills}
    ```
    
    Candidate resume: {resume}
    
    {format_instructions}
    """
    return template_string

In [9]:
def prepare_chat_schemas():
    
    candidate_name = ResponseSchema(name="name",
                                     description="Simply state the name of the candidate."
                                    )
    candidate_contact = ResponseSchema(name="contact",
                                     description="Simply copy the candidate's email address here."
                                    )        
    relevant_experience = ResponseSchema(name="relevant_experience",
                                     description="Based upon the job description, please extract any relevant experience from the resume and "
                                     "state it here. This experience should be relevant to the job description and the desired experience/skills. "
                                    )
    relevant_skills = ResponseSchema(name="relevant_skills",
                                     description="Based upon the job description, please extract any relevant skills from the resume and "
                                     "state it here. These skills should be relevant to the job description and the desired experience/skills."
                                     )
    
    
    relevant_education = ResponseSchema(name="relevant_education",
                                     description= "Based upon the job description, please extract information about relevant education from the resume and "
                                     "state it here. This information about education should be relevant to the job description and the desired level of education.")
    
    
    candidate_score = ResponseSchema(name="candidate_score",
                                          description="Here, you will rate the candidate on a scale from 1 - 10--with a 10 being a perfect fit (matches desired  "
                                          " perfectly). This should be a single number response only.")
    
    score_justification = ResponseSchema(name="score_justification",
                                          description="Here, you will justify your score, highlighting specific parts of the information that you extracted to "
                                          " make your case. This should be a brief, 1 - 5 sentence description for why this score was assigned that cites specific "
                                          " attributes from the candidate resume and the job description/desired qualifications.")
    
    response_schemas = [candidate_name,
                        candidate_contact,
                        relevant_experience, 
                        relevant_skills,
                        relevant_education,
                        candidate_score,
                        score_justification]
    output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
    format_instructions = output_parser.get_format_instructions()
    return response_schemas, output_parser, format_instructions


In [31]:
template_string = prepare_template_string()

In [11]:
response_schemas, output_parser, format_instructions = prepare_chat_schemas()

In [12]:
prompt = ChatPromptTemplate.from_template(template=template_string)


In [29]:
desired_education = "Not specified, but at least an undergraduate degree."
desired_experience = "The candidate should have at least two years experience in a related field."
desired_skills = "Not specified, but should be relevant to the job description"
job_description = "Top Tier: Someone with technical Sales Experience + Recruiting experience + worked in school systems;  " \
"Middle Tier: Someone who has technical sales experience + worked in recruiting (but no school experience); " \
" Low Tier: Only has one of the three thing (like has sales experience but no recruiting or schools experience). " \
"For this job, there are three main areas of qualification: technical sales experience, recruiting experience, and experience " \
" in school systems."


In [22]:
messages = prompt.format_messages(resume = resume, desired_education=desired_education, desired_experience=desired_experience, job_description=job_description, format_instructions=format_instructions)

In [23]:
response = chat(messages)


In [24]:
response

AIMessage(content='```json\n{\n\t"name": "Kevin J. Grimes",\n\t"contact": "kjgrimes@alaska.edu",\n\t"relevant_experience": "University of Alaska Fairbanks Office of Admissions Communication/Foster Youth Entry Coordinator, Communication Student Assistant, Millions.co Sports Entertainment Company Key Social Monitor/Ambassador/Moderator",\n\t"relevant_skills": "Data entry, communication, content creation, social media management",\n\t"relevant_education": "University of Alaska Fairbanks, Occupational Endorsement Certificate in Supervision and Personnel Management (2023-present); University of Alaska Fairbanks, Associate’s Degree in Applied Business w/Concentration in Management (2021-present); University of Alaska Fairbanks, Bachelor of Arts in History, Minor in Communication, Class of 2022; University of Alaska Fairbanks, Occupational Endorsement Certificate in Content Creation, Class of 2022",\n\t"candidate_score": "7",\n\t"score_justification": "Kevin has relevant experience in communi

In [25]:
output_dict = output_parser.parse(response.content)

In [26]:
output_dict

{'name': 'Kevin J. Grimes',
 'contact': 'kjgrimes@alaska.edu',
 'relevant_experience': 'University of Alaska Fairbanks Office of Admissions Communication/Foster Youth Entry Coordinator, Communication Student Assistant, Millions.co Sports Entertainment Company Key Social Monitor/Ambassador/Moderator',
 'relevant_skills': 'Data entry, communication, content creation, social media management',
 'relevant_education': 'University of Alaska Fairbanks, Occupational Endorsement Certificate in Supervision and Personnel Management (2023-present); University of Alaska Fairbanks, Associate’s Degree in Applied Business w/Concentration in Management (2021-present); University of Alaska Fairbanks, Bachelor of Arts in History, Minor in Communication, Class of 2022; University of Alaska Fairbanks, Occupational Endorsement Certificate in Content Creation, Class of 2022',
 'candidate_score': '7',
 'score_justification': 'Kevin has relevant experience in communication, content creation, and social media m

In [29]:
relevant_experience = output_dict.get('relevant_experience', 'unknown')
relevant_skills = output_dict.get('relevant_skills', 'unknown')
relevant_education = output_dict.get('relevant_education', 'unknown')
candidate_score = output_dict.get('candidate_score', 'unknown')
score_justification = output_dict.get('score_justification', 'unknown')

In [31]:
print(candidate_score, score_justification)

7 The candidate has relevant experience in technical sales and recruiting, as well as a relevant educational background. They possess a variety of skills that align with the desired qualifications. However, they do not have specific experience in school systems, which lowers their score slightly.


In [41]:
%pip install comtypes

Collecting comtypes
  Downloading comtypes-1.2.1-py2.py3-none-any.whl.metadata (3.8 kB)
Downloading comtypes-1.2.1-py2.py3-none-any.whl (184 kB)
   ---------------------------------------- 0.0/184.9 kB ? eta -:--:--
   ---------------------------------------- 184.9/184.9 kB 5.6 MB/s eta 0:00:00
Installing collected packages: comtypes
Successfully installed comtypes-1.2.1
Note: you may need to restart the kernel to use updated packages.


In [11]:
%pip install docx2txt

Collecting docx2txt
  Downloading docx2txt-0.8.tar.gz (2.8 kB)
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'
Building wheels for collected packages: docx2txt
  Building wheel for docx2txt (pyproject.toml): started
  Building wheel for docx2txt (pyproject.toml): finished with status 'done'
  Created wheel for docx2txt: filename=docx2txt-0.8-py3-none-any.whl size=3972 sha256=64817549fb2f08cf39eba9a91e69def5f41106d68baa14f95836280bf60b6a36
  Stored in directory: c:\users\johan\appdata\local\pip\cache\wheels\22\58\cf\093d0a6c3ecfdfc5f6ddd5524043b88e59a9a199cb02352966
Successfully built docx2txt
Installing collected packages: docx2txt
Successfully installed docx2txt-0.8
Note: you may need to restart the kernel t

In [41]:
def process_resume(resume, chat, output_parser, template_string, format_instructions, desired_education, desired_experience, desired_skills, job_description):

    # results_experience = []
    # results_skills = []
    # results_education = []
    # results_score = []
    # results_justification = []
    
    prompt = ChatPromptTemplate.from_template(template=template_string)
    messages = prompt.format_messages(resume = resume, 
                                      desired_education=desired_education, 
                                      desired_experience=desired_experience,
                                      desired_skills = desired_skills,
                                      job_description=job_description, 
                                      format_instructions=format_instructions)
    
    response = chat(messages)
    output_dict = output_parser.parse(response.content)
    candidate_name = output_dict.get('name', 'unknown')
    candidate_contact = output_dict.get('contact', 'unknown')
    relevant_experience = output_dict.get('relevant_experience', 'unknown')
    relevant_skills = output_dict.get('relevant_skills', 'unknown')
    relevant_education = output_dict.get('relevant_education', 'unknown')
    candidate_score = output_dict.get('candidate_score', 'unknown')
    score_justification = output_dict.get('score_justification', 'unknown')


    return candidate_name, candidate_contact, relevant_experience, relevant_skills, relevant_education, candidate_score, score_justification

    

In [33]:
relevant_experience, relevant_skills, relevant_education, candidate_score, score_justification = process_resume(resume = resume,
                                                                                                                chat = chat,
                                                                                                                output_parser = output_parser, 
                                                                                                                template_string = template_string, 
                                                                                                                format_instructions = format_instructions)

  warn_deprecated(


In [34]:
print(candidate_score, score_justification)

7 The candidate has relevant experience in business development, acquisitions, and underwriting, which aligns with the desired skills/experience. However, there is no mention of experience in school systems or recruiting. Therefore, the candidate receives a score of 7, indicating a good fit but not a perfect match.


In [42]:
def process_folder(resume_directory, chat, output_parser, template_string, format_instructions, desired_education, desired_experience, desired_skills, job_description):
    # I need this to go through the list of files, check if it is doc or pdf, load text accordingly, create the
    # resume object, and pass it to process resume, using the returned values to populate the results df.
    results = {
        'file': [],
        'name': [],
        'contact':[],
        'experience': [],
        'skills': [],
        'education': [],
        'score': [],
        'justification': []
    }
    total_remaining = len(os.listdir(resume_directory))
    for filename in os.listdir(resume_directory):
        filepath = os.path.join(resume_directory, filename)
        if filename.endswith('.docx'):
            loader = Docx2txtLoader(filepath)
            resume = loader.load()
        elif filename.endswith('.pdf'):
            loader = PyPDFLoader(filepath)
            resume = loader.load_and_split()
        else:
            print(f"Unsupported file format for {filename}. Skipping.")
            continue
        
        name, contact, experience, skills, education, score, justification = process_resume(
            resume, chat, output_parser, template_string, format_instructions, 
            desired_education, desired_experience, desired_skills, job_description
        )
        results['file'].append(filename)
        results['name'].append(name)
        results['contact'].append(contact)
        results['experience'].append(experience)
        results['skills'].append(skills)
        results['education'].append(education)
        results['score'].append(score)
        results['justification'].append(justification)
        total_remaining -= 1
        print(total_remaining)

    results_df = pd.DataFrame(results)
    return results_df

In [46]:
resume_directory = "small_test"
with get_openai_callback() as cb:
    result_df = process_folder(resume_directory, chat, output_parser, template_string, format_instructions, desired_education, desired_experience, desired_skills, job_description)
    print(cb)

3
2
1
0
Tokens Used: 9140
	Prompt Tokens: 8081
	Completion Tokens: 1059
Successful Requests: 4
Total Cost (USD): $0.0142395


In [47]:
result_df

Unnamed: 0,file,name,contact,experience,skills,education,score,justification
0,Ann_Marquez_resumeExecAsstDEC23 (1).pdf,Ann L. Marquez,annmarquez106@gmail.com,Bilingual administrative professional with ove...,"Travel & Calendar Management, Administrative S...",B.A. - Spanish Translation /Minor - Communicat...,7,Ann L. Marquez has relevant experience in admi...
1,Leyla_Beydoun_Resume__1_.pdf,Leyla Beydoun,leylabey@gmail.com,Associate Recruiter at Creative Marketing + Re...,"Recruitment marketing, digital marketing, dire...",B.F.A. Theater Performance from The University...,7,Leyla has relevant experience as an Associate ...
2,Resume (1).docx,Rasiel Freija,rfreija1@gmail.com,Business Development Associate at ZRG Partners...,"Marketing, sales, operations, business develop...",Bachelor of Science in Criminal Justice,7,Rasiel has relevant experience as a Business D...
3,Resume (1).pdf,Kevin J. Grimes,kjgrimes@alaska.edu,University of Alaska Fairbanks Office of Admis...,"Data entry, communication, content creation, s...","University of Alaska Fairbanks, Occupational E...",7,Kevin has relevant experience in communication...


In [23]:
result_df.to_csv("test_output.csv")

In [50]:
print(result_df)

                                      file             name  \
0  Ann_Marquez_resumeExecAsstDEC23 (1).pdf   Ann L. Marquez   
1             Leyla_Beydoun_Resume__1_.pdf    Leyla Beydoun   
2                          Resume (1).docx    Rasiel Freija   
3                           Resume (1).pdf  Kevin J. Grimes   

                   contact                                         experience  \
0  annmarquez106@gmail.com  Bilingual administrative professional with ove...   
1       leylabey@gmail.com  Associate Recruiter at Creative Marketing + Re...   
2       rfreija1@gmail.com  Business Development Associate at ZRG Partners...   
3      kjgrimes@alaska.edu  University of Alaska Fairbanks Office of Admis...   

                                              skills  \
0  Travel & Calendar Management, Administrative S...   
1  Recruitment marketing, digital marketing, dire...   
2  Marketing, sales, operations, business develop...   
3  Data entry, communication, content creation, s...  