# Stage 0: Load the Libraries

In [3]:
import pandas
import numpy
from pypdf import PdfReader
import boto3
from langchain_aws import BedrockLLM
from langchain_community.chat_models import BedrockChat


# Stage 1: Load the Data

In [4]:
# Load the Resume 
RESUME_FILE_PATH = "resume_extraction/data/test_resume.pdf"

In [5]:
# Convert PDF Resume to Text

resume_reader = PdfReader(RESUME_FILE_PATH)
resume_text = ""

for page in resume_reader.pages:
    resume_text += page.extract_text()


resume_text


Ignoring wrong pointing object 6 0 (offset 0)
Ignoring wrong pointing object 8 0 (offset 0)
Ignoring wrong pointing object 10 0 (offset 0)
Ignoring wrong pointing object 12 0 (offset 0)
Ignoring wrong pointing object 14 0 (offset 0)
Ignoring wrong pointing object 16 0 (offset 0)
Ignoring wrong pointing object 19 0 (offset 0)
Ignoring wrong pointing object 21 0 (offset 0)
Ignoring wrong pointing object 23 0 (offset 0)
Ignoring wrong pointing object 26 0 (offset 0)
Ignoring wrong pointing object 28 0 (offset 0)


' Ng Jia Yin Email: ng.jiayin@outlook.com | Mobile: 90501881 linkedin.com/in/jia-yin-ng/ | github.com/ngjiayin  EDUCATION National University of Singapore (NUS) Bachelor of Science (Honours) in Data Science and Analytics Aug 2021 – Present • Expected Date of Graduation: Dec 2025 • Relevant Coursework: Data Visualisation, Decision Trees for Machine Learning, Artificial Intelligence: Technology and Impact, Data Structures and Algorithms, Mathematical Statistics, Multivariable Calculus  WORK EXPERIENCE Johnson & Johnson, Analytics Intern Jan 2024 – May 2024 • Managed end-to-end financial forecasting for the Long Range Financial Plan (LRFP) spanning from 2024 to 2028, managing sales figures totalling more than $4 billion. • Implemented data validation and cleaning processes using Microsoft Excel, resulting in a 15% reduction in data errors. • Utilised design thinking methodologies to develop a 90-second brand launch video for an in-house data analytics platform, enhancing brand engagement 

# Stage 2: Intialise AWS Bedrock

In [7]:
# Initialise bedrock client

llm = BedrockLLM(
    model_id="mistral.mixtral-8x7b-instruct-v0:1",
    model_kwargs={"temperature": 0.3, "max_tokens":4076, "top_p":0.1,"top_k":50}
)




In [8]:
# Invoke the llm
response = llm.invoke("Hello! How are you today?")
print(response)

 I hope you’re having a great week so far. I’m excited to share a new recipe with you today! I’ve been making this recipe for years and it’s one of my favorites. It’s a simple, healthy, and delicious recipe that’s perfect for breakfast, lunch, or dinner.

This recipe is for a quinoa and black bean salad. It’s made with cooked quinoa, black beans, corn, cherry tomatoes, red onion, avocado, lime juice, olive oil, salt, and pepper. It’s a hearty and filling salad that’s packed with protein, fiber, and healthy fats.

To make the salad, start by cooking the quinoa according to the package instructions. Once the quinoa is cooked, let it cool to room temperature.

Next, drain and rinse the black beans and corn. Cut the cherry tomatoes in half and dice the red onion.

In a large bowl, combine the cooked quinoa, black beans, corn, cherry tomatoes, red onion, and avocado.

In a small bowl, whisk together the lime juice, olive oil, salt, and pepper. Pour the dressing over the salad and toss to co

# Stage 3: Prompt Design & Fine-Tuning & Data Models

In [88]:
# Extract the skills from the resume
# Competencies are divided into: Core, Functional, Technical, and Leadership Skills




hard_skill_entity_extraction_prompt = """
You are a career counselor. Your task is to extract skill entities from the given text, which can be a resume or a job description.

Skill Entities:
Hard Skills: Also called technical skills, these are job-specific and relevant to each position and seniority level. In other words, each position in every company will require unique hard skills.

TASK:
1. Perform a Part-of-Speech (POS) Tagging on the text.
2. Using the POS-tagged resume, perform Name Entity Recognition to identify all explicitly stated skill entities.
3. For each skill, provide an explanation of skill demonstration using context from the text and the context's contribution to the skill.
4. State the context as it appears in the resume, without extrapolating it with other experiences.

----------------------
Format Instruction:
{format_instructions}
----------------------

----------------------
Text: {text}
----------------------
"""

soft_skill_entity_extraction_prompt = """
You are a career counselor. 
Your task is to extract skill entities from the given text, text can be a resume or a Job Description.

 ------------------------------- 
 Skill Entities: Soft Skills: the term ‘soft skills’ refers to a broad set of skills, behaviors, attitudes and personal qualities that allow people to adapt effectively to their environment, to work well with others, to perform well, and to achieve their goals.
------------------------------- 

TASK: 
1. Perform a Part-of-Speech(POS) Tagging on the text. 
2. Using the POS-tagged resume, perform Name Entity Recognition to identify explicit skills. 
3. Do not assume skills, only extract skills given in the text. 
4. For each skill provide a justification of skill demonstration in resume. For Skill: Python Example: "User Demonstrated Python skill by developing a web application using Django framework."

Only return the JSON. 
------------------------------- 
Format Instruction: 
{format_instructions}
------------------------------- 

------------------------------- 
text: {text}
-------------------------------
"""

modify_skills_entity_extraction_prompt = """

TASK: 
1. Understand User Modified Skill Name,Modification justification and the modification action user wants to perform.
2. For add or modify Action , check if the skill_name already exists, if it does use the explanation to add to the current skill description.
2. For delete action, use the explanation to determine if the whole skills needs to be deleted or just parts of the current justification.

Only return the affected skill_name and skill_justification. 
If the whole skill is deleted , return an empty justification.
If the user input justification is not enough or enough infromation about the action is not provided, simply return an empty skill_name.

Only return the JSON. 
------------------------------- 
Format Instruction: 
{format_instructions}
------------------------------- 

------------------------------- 
user input skill: {skill_name}
user justification: {user_justification}
action: {action}
All Skills: {all_skills}
-------------------------------
"""



fix_format_instruction = """
--------------
{instructions}
--------------
Completion:
--------------
{completion}
--------------
Above, the Completion did not satisfy the constraints given in the Instructions.
Error:
--------------
{error}
--------------
Please try again. Please only respond with an answer that satisfies the constraints laid out in the Instructions.
Important:  Only correct the structural issues within the JSON format. Do not modify the existing data values themselves:
"""


In [76]:
from typing import List
from pydantic import BaseModel, Field

class Competency(BaseModel):
    competency_type: str = Field(..., description="Type of competency:Functional, Technical")
    competency_name: str = Field(..., description="Name of the competency")
    competency_description: str = Field(..., description="Description of how the client has demonstrated the competency in their resume")

class Comptencies(BaseModel):
    competencies: List[Competency] = Field(..., description="List of competencies extracted from the resume")

class Skill(BaseModel):
    skill: str = Field(..., description="Name of the skill")
    # skill_level_justification : str = Field(..., description="Justification of the skill level")    
    skill_level: str = Field(..., description="Level of the skill", enum=["Beginner", "Intermediate", "Expert"])
    # skill_justification: str = Field(..., description="Justification of the extracted skill and level")# 

class Skills(BaseModel):
    skills: List[Skill] = Field(..., description="List of skills and their corresponding skill level extracted from the resume")
    
class SoftSkill(BaseModel):
    skill: str = Field(..., description="Name of the skill")
    skill_justification: str = Field(..., description="explanation of skill demonstration using context from the text and the context's contribution to the skill")#

class SoftSkills(BaseModel):
    soft_skills: List[SoftSkill] = Field(..., description="List of soft skills ")

class HardSkill(BaseModel):
    skill: str = Field(..., description="Name of the skill")
    skill_explanation: str = Field(..., description="explanation of skill demonstration using context from the text and the context's contribution to the skill")#

class HardSkills(BaseModel):
    hard_skills: List[HardSkill] = Field(..., description="List of hard skills ")


# Stage 4: Extract Comptencies from the Resume

In [89]:
from langchain.prompts import PromptTemplate
from langchain.output_parsers import PydanticOutputParser, OutputFixingParser
from langchain.chains import TransformChain

def fix_chain_fun(inputs):    
    fix_prompt = PromptTemplate.from_template(fix_format_instruction)
    fix_prompt_str = fix_prompt.invoke({'instructions':inputs['instructions'],
                                        'completion':inputs['completion'],
                                        'error':inputs['error']}).text
    
    #print(fix_prompt_str)
    
    completion = llm.invoke(fix_prompt_str)

    # return {"completion": completion}
    
    return {"completion": completion}

fix_chain = TransformChain(
    input_variables = ["instructions", "completion", "error"],output_variables=["completion"], transform=fix_chain_fun
)


def get_hard_skills(resume_text: str) ->  HardSkills:
    '''Takes in User's Resume Text, extracts hard skills form resume , return a JSON of all extracted skills and their explanations using context from Resume.
    
    Args:
        resume_text: Resume String for skill extraction.
    '''
    # Invoke the LLM
    parser = PydanticOutputParser(pydantic_object= HardSkills)
    
    fix_parser = OutputFixingParser(
        parser=parser,
        retry_chain=fix_chain,
        max_retries=2
    )

    prompt = PromptTemplate(
        template = hard_skill_entity_extraction_prompt, 
        input_variables=["text"],
        partial_variables= {"format_instructions": parser.get_format_instructions()})
    
    prompt_str = prompt.format(text=resume_text)

    print(prompt_str)
    
    response = llm.invoke(prompt_str)


    print(f"Response is : {response}")

    fixed_response = fix_parser.invoke(response).dict()

    return fixed_response

def get_soft_skills(resume_text: str) ->  SoftSkills:
    # Invoke the LLM
    parser = PydanticOutputParser(pydantic_object= SoftSkills)
    
    fix_parser = OutputFixingParser(
        parser=parser,
        retry_chain=fix_chain,
        max_retries=2
    )

    prompt = PromptTemplate(
        template = soft_skill_entity_extraction_prompt, 
        input_variables=["text"],
        partial_variables= {"format_instructions": parser.get_format_instructions()})
    
    prompt_str = prompt.format(text=resume_text)

    print(prompt_str)
    
    response = llm.invoke(prompt_str)


    print(f"Response is : {response}")

    fixed_response = fix_parser.invoke(response).dict()

    return fixed_response



In [26]:
# extracted_skills = extract_skills(resume_text)
extract_hard_skills = extract_hard_skills(resume_text)

extract_soft_skills = extract_soft_skills(resume_text)


You are a career counselor. Your task is to extract skill entities from the given text, which can be a resume or a job description.

Skill Entities:
Hard Skills: Also called technical skills, these are job-specific and relevant to each position and seniority level. In other words, each position in every company will require unique hard skills.

TASK:
1. Perform a Part-of-Speech (POS) Tagging on the text.
2. Using the POS-tagged resume, perform Name Entity Recognition to identify all explicitly stated skill entities.
3. For each skill, provide an explanation of skill demonstration using context from the text and the context's contribution to the skill.
4. State the context as it appears in the resume, without extrapolating it with other experiences.

----------------------
Format Instruction:
The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings",

In [27]:
# Save the extracted competencies to a JSON file

import json
import datetime

with open(f"candidate_soft_extracted.json", "w") as f:
    json.dump(extract_soft_skills, f, indent=4) 

with open(f"candidate_hard_extracted.json", "w") as f:
    json.dump(extract_hard_skills, f, indent=4) 



# Skills Extraction Agent

In [9]:
from langchain_core.messages import HumanMessage
from langgraph.checkpoint.sqlite import SqliteSaver
from langgraph.prebuilt import create_react_agent
from langchain_aws import ChatBedrock
from langchain_core.tools import tool

## Defining Global Variables

In [10]:
SKILLS = {}

## Creating Tools

In [11]:
# A tool consists of name of the tool, Description of the tool , A JSON schema defining the inputs to the tool and A function
@tool(parse_docstring=True)
def get_hard_skills(resume_text: str) ->  str:
    """Takes in User's Resume Text, extracts hard skills form resume , return a JSON of all extracted skills and their explanations using context from Resume.
    
    Args:
        resume_text: Resume String for skill extraction.
    """
    # Invoke the LLM
    parser = PydanticOutputParser(pydantic_object= HardSkills)
    
    fix_parser = OutputFixingParser(
        parser=parser,
        retry_chain=fix_chain,
        max_retries=2
    )

    prompt = PromptTemplate(
        template = hard_skill_entity_extraction_prompt, 
        input_variables=["text"],
        partial_variables= {"format_instructions": parser.get_format_instructions()})
    
    prompt_str = prompt.format(text=resume_text)

    print(prompt_str)
    
    response = llm.invoke(prompt_str)


    print(f"Response is : {response}")

    fixed_response = fix_parser.invoke(response).dict()

    for skill_set in fixed_response["hard_skills"]:
        
        key = skill_set["skill"].lower()
        justification = skill_set["skill_explanation"]

        SKILLS[key] = justification

    return str(fixed_response) + "Skills were extracted successfully!"


@tool(parse_docstring=True)
def modify_skills(skill_name:str, user_explanation:str, action:str) ->  HardSkills:
    """Responsible for Adding, Modifying or Deleting skills from skill set extracted from the resume, only when explicitly asked by the user.Takes in a skill_name , user explanation for the action to be done on the skill and action.Actions can be add, delete or modify. 
    
    Args:
        skill_name: Name of the skill modification needs to be done on
        user_explanation: Explanation of the modification
        action: add or delete
    """
    
    # Invoke the LLM
    parser = PydanticOutputParser(pydantic_object= HardSkill)
    
    fix_parser = OutputFixingParser(
        parser=parser,
        retry_chain=fix_chain,
        max_retries=2
    )

    prompt = PromptTemplate(
        template = modify_skills_entity_extraction_prompt, 
        input_variables=["skill_name","user_justification", "all_skills"],
        partial_variables= {"format_instructions": parser.get_format_instructions()})
    
    prompt_str = prompt.format(skill_name=skill_name, user_justification=user_explanation, all_skills = SKILLS, action = action)

    print(prompt_str)
    
    response = llm.invoke(prompt_str)


    print(f"Response is : {response}")

    fixed_response = fix_parser.invoke(response).dict()


    if fixed_response["skill"] and fixed_response["skill_explanation"]:
        key = fixed_response["skill"].lower()
        justification = fixed_response["skill_explanation"]
        if key not in SKILLS:
            SKILLS[key] = justification
            return f"Skill: {skill_name} was Added Sucessfully."
        else:
            SKILLS[key] = justification
            return f"Skill: {skill_name} was Modified Sucessfully."
    if fixed_response["skill"] and not fixed_response["skill_explanation"]:
        key = fixed_response["skill"].lower()
        del SKILLS[key]
        return f"Skill: {skill_name} was Deleted Sucessfully"
    ß
        

    return "No Action was performed, please elaborate further."

get_hard_skills.args_schema.schema()
modify_skills.args_schema.schema()

NameError: name 'HardSkills' is not defined

## Creating Agent

In [114]:
memory = SqliteSaver.from_conn_string(":memory:")
model = ChatBedrock(model_id ="anthropic.claude-3-haiku-20240307-v1:0", model_kwargs={"temperature": 0, "max_tokens":6000, "top_p":0.1,"top_k":50})
tools = [get_hard_skills, modify_skills]
agent_executor = create_react_agent(model, tools, checkpointer=memory)


In [115]:
print(resume_text)

ANSHUMAAN TYAGI
HP: +65 80169372      Email: e0866249@u.nus.edu
EDUCATION
National University of Singapore Aug 2021 - May 2025
Major: Bachelor of Computing in Computer Science(Honours)
Second Major: Data Science.
Specialisation Track: Software Engineering and Artificial Intelligence.
GPA: 4.31/5.
 
TECHNICAL SKILLS
Proficient languages: Python, Java ,SQL,  Javascript, HTML and CSS
Proficient frameworks/libraries: Pytorch, Numpy, Django, PostgreSQL, React & React Native,Node.js, 
Sklearn, Firebase, Excel
Knowledge of: MongoDB, LINUX/UNIX administration , R, Azure Open AI, GPT prompting, 
embeddings, Spacy
 
WORK EXPERIENCE
AI Developer, SAP, Singapore Jan 2023 - Present
Integrating GPT powered by Azure Open AI endpoints in the current pipeline to perform parsing of 
PDF documents.
Improving GPT response efficiency by 5 percent and data security.
Utilized different prompting techniques such as RAG and chain of thought to minimize AI 
hallucinations.
Competed regular bimonthly s

In [116]:
q1 = f"Hey, Extract my skills, my resume is: {resume_text}"
config = {"configurable": {"thread_id": "abc123"}}


for chunk in agent_executor.stream(
{"messages": [HumanMessage(content=q1)]}, config
):
    print(chunk)
    print("----")





{'agent': {'messages': [AIMessage(content='', additional_kwargs={'usage': {'prompt_tokens': 1496, 'completion_tokens': 942, 'total_tokens': 2438}, 'stop_reason': 'tool_use', 'model_id': 'anthropic.claude-3-haiku-20240307-v1:0'}, response_metadata={'usage': {'prompt_tokens': 1496, 'completion_tokens': 942, 'total_tokens': 2438}, 'stop_reason': 'tool_use', 'model_id': 'anthropic.claude-3-haiku-20240307-v1:0'}, id='run-000bf4fd-7849-4de0-946c-46417ea099b1-0', tool_calls=[{'name': 'get_hard_skills', 'args': {'resume_text': "ANSHUMAAN TYAGI\nHP: +65 80169372      Email: e0866249@u.nus.edu\nEDUCATION\nNational University of Singapore Aug 2021 - May 2025\nMajor: Bachelor of Computing in Computer Science(Honours)\nSecond Major: Data Science.\nSpecialisation Track: Software Engineering and Artificial Intelligence.\nGPA: 4.31/5.\n \nTECHNICAL SKILLS\nProficient languages: Python, Java ,SQL,  Javascript, HTML and CSS\nProficient frameworks/libraries: Pytorch, Numpy, Django, PostgreSQL, React & Re

In [107]:
print(SKILLS)

{'python': 'Used Python to design server applications and client interfaces in the role of Software Engineer(Backend Developer) at NUS CommIT.', 'java': 'Not explicitly demonstrated in the text.', 'sql': "Studied industrial application of SQL database use cases and it's deployment in the role of Full Stack Developer at ST Engineering. Utilized SQL database in the role of Software Engineer(Backend Developer) at NUS CommIT.", 'javascript': 'Handled development and management of frontend user interfaces with the help of HTML, CSS, and Javascript in the role of Software Engineer(Backend Developer) at NUS CommIT.', 'html': 'Handled development and management of frontend user interfaces with the help of HTML, CSS, and Javascript in the role of Software Engineer(Backend Developer) at NUS CommIT.', 'css': 'Handled development and management of frontend user interfaces with the help of HTML, CSS, and Javascript in the role of Software Engineer(Backend Developer) at NUS CommIT.', 'pytorch': 'Not

In [117]:
q2 = "Can you elt me know how I used GPT models?"

for chunk in agent_executor.stream(
{"messages": [HumanMessage(content=q2)]}, config
):
    print(chunk)
    print("----")




{'agent': {'messages': [AIMessage(content='', additional_kwargs={'usage': {'prompt_tokens': 3360, 'completion_tokens': 144, 'total_tokens': 3504}, 'stop_reason': 'tool_use', 'model_id': 'anthropic.claude-3-haiku-20240307-v1:0'}, response_metadata={'usage': {'prompt_tokens': 3360, 'completion_tokens': 144, 'total_tokens': 3504}, 'stop_reason': 'tool_use', 'model_id': 'anthropic.claude-3-haiku-20240307-v1:0'}, id='run-73c02dc9-8d50-4430-9435-4c3c76295f07-0', tool_calls=[{'name': 'modify_skills', 'args': {'skill_name': 'GPT prompting', 'user_explanation': 'The resume mentions that I utilized different prompting techniques such as RAG and chain of thought to minimize AI hallucinations in my role as an AI Developer at SAP. This shows that I have experience working with and fine-tuning GPT models.', 'action': 'modify'}, 'id': 'toolu_bdrk_014vtZ58sFuzZrkLwujGkARw', 'type': 'tool_call'}], usage_metadata={'input_tokens': 3360, 'output_tokens': 144, 'total_tokens': 3504})]}}
----


TASK: 
1. Und

In [109]:
SKILLS

{'python': 'Used Python to design server applications and client interfaces in the role of Software Engineer(Backend Developer) at NUS CommIT.',
 'java': 'Not explicitly demonstrated in the text.',
 'sql': "Studied industrial application of SQL database use cases and it's deployment in the role of Full Stack Developer at ST Engineering. Utilized SQL database in the role of Software Engineer(Backend Developer) at NUS CommIT.",
 'javascript': 'Handled development and management of frontend user interfaces with the help of HTML, CSS, and Javascript in the role of Software Engineer(Backend Developer) at NUS CommIT.',
 'html': 'Handled development and management of frontend user interfaces with the help of HTML, CSS, and Javascript in the role of Software Engineer(Backend Developer) at NUS CommIT.',
 'css': 'Handled development and management of frontend user interfaces with the help of HTML, CSS, and Javascript in the role of Software Engineer(Backend Developer) at NUS CommIT.',
 'pytorch'

In [110]:
# Save the extracted competencies to a JSON file

import json

with open(f"candidate_skills_extracted.json", "w") as f:
    json.dump(SKILLS, f, indent=4)
    

# Stage 6: Streamlit Application

In [112]:

from langchain_community.callbacks.streamlit import (
    StreamlitCallbackHandler,
)
import streamlit as st

st_callback = StreamlitCallbackHandler(st.container())

In [113]:
if prompt := st.chat_input():
    st.chat_message("user").write(prompt)
    with st.chat_message("assistant"):
        st_callback = StreamlitCallbackHandler(st.container())
        response = agent_executor.invoke(
            {"input": prompt}, {"callbacks": [st_callback]}
        )
        st.write(response["output"])

2024-07-24 22:09:34.772 
  command:

    streamlit run /Users/anshumaantyagi/Desktop/CourseRec/env/lib/python3.9/site-packages/ipykernel_launcher.py [ARGUMENTS]
