# Parsing pdf with Pymupdf

In [15]:
import pymupdf
doc = pymupdf.open("resume.pdf")

In [16]:
doc

Document('resume.pdf')

In [21]:
text = ""
for page in doc:
    text += page.get_text() + "\n"

print(text)

Singapore
hangjoni@gmail.com
+65 97764369
Joni Ngo
linkedin.com/in/hangjoni
github.com/hangjoni
Data Scientist with Sales background. Unique perspective on leveraging data-driven solutions to meet business objectives.
M.Sc. in Machine Learning with proven expertise in developing recommendation systems, data solutions and predictive
models.
Work Experience
Customer Data Scientist
Smartly.io
2018 – 2023
●
Designed and built tailored/scalable data products to support clients in achieving key metrics. Projects include:
Vertical benchmark dashboard:
o
Initiated and led a team to build a dashboard benchmarking marketing metrics for clients against their
relevant verticals and markets. Engineered data mining methods to extract data from multiple sources and
formats which unlock proprietary analysis previously unavailable.
o
> 50% of Sales teams adopted the dashboard in key client renewals engagement. The dashboard was
eventually converted to a fully scaled, revenue generating product helping 

# Agent

In [47]:
import os
import openai

from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.tools.render import format_tool_to_openai_function
from langchain.agents.output_parsers import OpenAIFunctionsAgentOutputParser
from langchain.utils.openai_functions import convert_pydantic_to_openai_function
from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser

from typing import List, Optional
from pydantic import BaseModel, Field

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file
openai.api_key = os.environ['OPENAI_API_KEY']

In [23]:
model = ChatOpenAI()
prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a career advisor. You have been asked to review a resume and provide feedback and suggestions"),
    ("user", "{input}"),
])
chain = prompt | model

In [24]:
result = chain.invoke({"input": text})

In [25]:
result

AIMessage(content='Overall, your resume is quite impressive with a strong focus on data science and machine learning. Here are some suggestions to further enhance your resume:\n\n1. **Summary Section**: Consider expanding your summary to include more details about your career goals, key achievements, and unique skills that set you apart from other candidates. Highlight how your sales background complements your data science expertise.\n\n2. **Work Experience**: Your work experience section is detailed and showcases your hands-on experience in developing data products and solutions. Consider quantifying your achievements with specific metrics or percentages to provide concrete evidence of your impact.\n\n3. **Education**: Your educational background is impressive, especially with your specialization in Machine Learning. Make sure to highlight any relevant coursework or projects that demonstrate your expertise in the field.\n\n4. **Projects**: Your projects section is strong and demonstr

In [26]:
print(result.content)

Overall, your resume is quite impressive with a strong focus on data science and machine learning. Here are some suggestions to further enhance your resume:

1. **Summary Section**: Consider expanding your summary to include more details about your career goals, key achievements, and unique skills that set you apart from other candidates. Highlight how your sales background complements your data science expertise.

2. **Work Experience**: Your work experience section is detailed and showcases your hands-on experience in developing data products and solutions. Consider quantifying your achievements with specific metrics or percentages to provide concrete evidence of your impact.

3. **Education**: Your educational background is impressive, especially with your specialization in Machine Learning. Make sure to highlight any relevant coursework or projects that demonstrate your expertise in the field.

4. **Projects**: Your projects section is strong and demonstrates your practical skills 

In [61]:
class WorkExperience(BaseModel):
    """Describe work experiences in a more concise and impactful manner. 
        Make sure the impact states clear quantifiable results and shows the skills and role of the candidate. 
        Make sure the description is stated in how it is relevant for a job as Data Scientist. """
    company: str = Field(..., title="Company Name", description="Name of the company")
    title: str = Field(..., title="Job Title", description="Job title")
    start_date: str = Field(..., title="Start Date", description="Start date of the job")
    end_date: str = Field(None, title="End Date", description="End date of the job")
    description: Optional[str] = Field(None, title="Description", description="Description of the job responsibilities")
    impacts: Optional[List[str]] = Field(None, title="Impacts", description="List of key projects and impacts made in the job")


In [62]:
class WorkExperienceList(BaseModel):
    """List of work experiences"""
    work_experiences: List[WorkExperience] = Field(..., title="Work Experiences", description="List of work experiences")

In [63]:
work_parsing_function = convert_pydantic_to_openai_function(WorkExperienceList)
work_parsing_function

{'name': 'WorkExperienceList',
 'description': 'List of work experiences',
 'parameters': {'type': 'object',
  'properties': {'work_experiences': {'description': 'List of work experiences',
    'type': 'array',
    'items': {'description': 'Describe work experiences in a more concise and impactful manner. \nMake sure the impact states clear quantifiable results and shows the skills and role of the candidate. \nMake sure the description is stated in how it is relevant for a job as Data Scientist. ',
     'type': 'object',
     'properties': {'company': {'description': 'Name of the company',
       'type': 'string'},
      'title': {'description': 'Job title', 'type': 'string'},
      'start_date': {'description': 'Start date of the job', 'type': 'string'},
      'end_date': {'description': 'End date of the job', 'type': 'string'},
      'description': {'description': 'Description of the job responsibilities',
       'type': 'string'},
      'impacts': {'description': 'List of key projec

In [64]:
prompt = ChatPromptTemplate.from_messages([
    ("system", "Read the user input carefully and extract the work experiences from the resume"),
    ("user", "{input}")
])

model_with_work = model.bind(
    functions=[work_parsing_function],
    function_call={"name": "WorkExperienceList"}
)

chain = prompt | model_with_work | JsonOutputFunctionsParser(key_name="work_experiences")

output = chain.invoke({"input": text})

In [65]:
output

{'work_experiences': [{'company': 'Smartly.io',
   'title': 'Customer Data Scientist',
   'start_date': '2018',
   'end_date': '2023',
   'description': 'Designed and built tailored/scalable data products to support clients in achieving key metrics.',
   'impacts': ['Initiated and led a team to build a dashboard benchmarking marketing metrics for clients against their relevant verticals and markets. Engineered data mining methods to extract data from multiple sources and formats which unlock proprietary analysis previously unavailable.',
    '50% of Sales teams adopted the dashboard in key client renewals engagement. The dashboard was eventually converted to a fully scaled, revenue generating product helping to expand profit margin with clients.'],
   'skills': 'SQL, Data Build Tool, data extraction, Snowflake, Tableau'},
  {'company': 'Smartly.io',
   'title': 'Customer Data Scientist',
   'start_date': '2018',
   'end_date': '2023',
   'description': 'Designed and built tailored/scal

In [55]:
prompt = ChatPromptTemplate.from_messages([
    ("system", "Rewrite the following work experiences in a more concise and impactful manner. Make sure the impact states clear quantifiable results and shows the skills and role of the candidate. Make sure the description is stated in how it is relevant for a job as Data Scientist. Keep the format of the work experiences the same."),
    ("user", "{input}")
])

model = ChatOpenAI()

chain = prompt | model

output2 = chain.invoke({"input": output})

In [57]:
print(output2.content)

{'work_experiences': [{'company': 'Smartly.io', 'title': 'Customer Data Scientist', 'start_date': '2018', 'end_date': '2023', 'description': 'Designed tailored data products to boost client metrics.', 'impacts': ['Led team to create benchmarking dashboard for clients, improving marketing metrics by 20%.', 'Developed Feature Recommendation system with chatbot integration, achieving 90% acceptance rate.', 'Engineered web app for generating marketing recommendations, saving 15 hours/week for account managers.', 'Built CPA modeling web app using linear regression, resulting in 10% upsell impact during budget planning.'], 'skills': ['SQL', 'Data Build Tool', 'Snowflake', 'Tableau', 'Scikit-learn', 'Pandas', 'API', 'Flask', 'React', 'Docker', 'D3.js']}, {'company': 'Google', 'title': 'Territory Account Manager', 'start_date': '2014', 'end_date': '2017', 'description': 'Grew Vietnam portfolio by +130% through strategic partnerships with media agencies.', 'skills': ['Sales', 'Partnerships', 'E