In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
from datetime import datetime
from IPython.display import display, Markdown
from pathvalidate import sanitize_filename

import utils

from prompts import Job_Post, Resume_Builder

In [3]:
## Inputs
my_files_dir = "my_applications"  # location for all job and resume files
job_file = "job.txt"  # filename with job post text. The entire job post can be pasted in this file, as is.
raw_resume_file = "resume_raw.yaml"  # filename for raw resume yaml. See example in repo for instructions.

# Model for matching resume to job post (except extraction chains)
# gpt-4 is not publicly available yet and can be 20-30 times costlier than the default model gpt-3.5-turbo.
# Simple extraction chains are hardcoded to use the cheaper gpt-3.5 model.
openai_model_name = "gpt-4"
# openai_model_name = "gpt-3.5-turbo"

# temperature lower than 1 is preferred. temperature=0 returns deterministic output
temperature = 0.5

llm_kwargs = dict(
    model_name=openai_model_name,
    model_kwargs=dict(top_p=0.6, frequency_penalty=0.1),
)

In [4]:
# Step 1 - Read and parse job posting
job_post = Job_Post(
    utils.read_jobfile(os.path.join(my_files_dir, job_file)),
)
parsed_job = job_post.parse_job_post(verbose=False)

company_name = parsed_job["company"]
job_title = parsed_job["job_title"]
today_date = datetime.today().strftime("%Y%m%d")
job_filename = os.path.join(
    my_files_dir, sanitize_filename(f"{today_date}__{company_name}__{job_title}")
)
print(f"#filename: {job_filename}.job\n")
utils.write_yaml(parsed_job, filename=f"{job_filename}.job")



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: You are a world class algorithm for extracting information in structured formats.
Human: Use the given format to extract information from the following input:
Human: Varo represents a new generation of fintech built on technology and innovation to empower consumers. We serve millions of Americans with our mobile app to make digital banking easy, convenient, and personalized.

We believe the future of lending in fintech lies with Machine Learning. Be one of the first scientists in the world to innovate models that dynamically set credit limits, decide loan eligibility, and predict customer borrowing behavior. You’ll lead Machine Learning in lending at Varo, working on predictive insights and underwriting models that enable broader, more inclusive, and affordable access to credit for customers.

The Lead Machine Learning Scientist in Lending will be responsible for developing the machine learning mod

In [5]:
# Step 2 - read raw resume and create Resume builder object
my_resume = Resume_Builder(
    resume=utils.read_yaml(filename=os.path.join(my_files_dir, raw_resume_file)),
    parsed_job=parsed_job,
    llm_kwargs=llm_kwargs,
)
projects = my_resume.projects_raw
experiences = my_resume.experiences_raw
skills = my_resume.skills_raw

In [6]:
# Step 3 - Rephrase unedited experiences. Try re-running cells in case of missing answers or hallucinations.
experiences = my_resume.rewrite_unedited_experiences(verbose=True)
utils.write_yaml(dict(experiences=experiences))



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: You are an expert technical writer. Your goal is to strictly follow all the provided <Steps> and meet all the given <Criteria>.
Human: <Instruction> Identify the relevant portions from the <Master Resume> that match the <Job Posting>, rephrase these relevant portions into highlights, and rate the relevance of each highlight to the <Job Posting> on a scale of 1-5.
Human: <Criteria> 
- Each highlight must be based on what is mentioned in the <Master Resume>.
- In each highlight, include how that experience in the <Master Resume> demonstrates an ability to perform duties mentioned in the <Job Posting>.
- In each highlight, try to include action verbs, give tangible and concrete examples, and include success metrics when available.
- Grammar, spellings, and sentence structure must be correct.
Human: <Steps>
- Create a <Plan> for following the <Instruction> while meeting all the <Criteria>.
- What <Addi

In [None]:
# Review the generated output in previous cell.
# If any updates are needed, copy the cell output below between the triple quotes
# Set value to """" """" if no edits are needed
edits = """experiences:
- company: Marqeta, Oakland CA
  titles:
  - name: Staff Machine Learning Researcher
    startdate: 2022
    enddate: current
  highlights:
  - Demonstrated ability to lead machine learning projects, as evidenced by successfully
    leading a fraud detection project at Marqeta, demonstrating both technical expertise
    and leadership skills
  - Strong familiarity with AWS Sagemaker and Pyspark, as demonstrated by setting
    up AWS Sagemaker Studio in VPC and writing
    Pyspark ML financial model evaluation library.
  - Demonstrated ability to design and implement ML infrastructure, as shown by the
    creation of a comprehensive design doc
  - Proven experience in handling large datasets, as shown by launching a project
    involving billions of transactions with over 300 features
  - Experience in introducing and discussing MLOps concepts with the ML team, indicating
    a strong understanding of current practices in the field
  - Proven ability to communicate and share ML best practices with leadership and
    executive teams, demonstrating strong communication and leadership skills
  - Led the machine learning project for risk and fraud detection in card transactions, 
    demonstrating expertise in end-to-end ML development including data processing, feature
    engineering, model training, and evaluation, and showcasing ability to work closely with 
    Product Managers, Engineers, and Researchers.
  - Launched a large project to collect training data for billions of transactions
    and over 1000 features, and built a pipeline to share data with stakeholders while
    ensuring security and privacy compliance, showcasing the ability to work
    in a fast-paced, dynamic environment and practical experience in data analysis,
    statistics, and experimentation.
  - Built a thorough evaluation methodology for vendors' supervised and unsupervised
    fraud models. Successfully evaluated
    vendors' fraud models and communicated the results to leadership, demonstrating strong 
    problem-solving skills and the ability
    to foster data-driven business decisions.
  - Successfully set up AWS Sagemaker Studio and Glue in VPC to enable ML engineers
    to use Pyspark, highlighting experience with development of AI models on managed cloud services. Wrote 
    Pyspark ML financial model evaluation library.
  - Introduced MLOps best practices to the ML team, promoting efficient collaboration
    during model development, and showcasing experience in collaborating and influencing technical teams.
  - Demonstrated leadership skills by working independently across challenging stakeholders,
    fostering seamless collaboration and achieving successful outcomes.
- company: One Concern Inc., Menlo Park CA
  titles:
  - name: Director of Research
    startdate: 2020
    enddate: 2022
  - name: Lead Machine Learning Engineer in Resilience team
    startdate: 2017
    enddate: 2020
  - name: Machine Learning Researcher
    startdate: 2015
    enddate: 2017
  highlights:
  - Developed and validated physics + machine learning based natural hazard models,
    building trust for existing and potential clients, and ensuring highest safety
    standards for ML models.
  - Led a research team to develop statistical epidemiological models for COVID-19
    Calculator, resulting in a peer-reviewed journal paper publication, and
    highlighting the ability to analyze
    large-scale time series data, suggest innovative solutions and extend progress and trends from
    academia to real-world problems.
  - Successfully patented a probabilistic machine learning model for estimating infrastructure
    impacts after an earthquake, showcasing the ability to deliver high level of technical
    quality and drive high impact projects.
  - Mentored data scientists in developing multi-stage machine learning and computer
    vision models that successfully identified soft-story buildings using Google StreetView
    images, providing technical leadership and mentoring. Presented the project findings
    at a seismic conference.
  - Served as the technical expert for business development, with frequent international
    travel, and played a crucial role in closing a \$90 million deal, demonstrating
    the ability to communicate complex technical ideas to both technical and non-technical 
    stakeholders with clarity and precision.
  - Managed collaborations
    with prestigious universities and research institutions, resulting in the development
    of cutting-edge technologies in network analysis and disaster impacts.
"""

edits = edits.strip()
if edits:
    edits1 = utils.read_yaml(edits)
    if "experiences" in edits1:
        experiences = edits1["experiences"]
    if "projects" in edits1:
        projects = edits1["projects"]
    if "skills" in edits1:
        skills = edits1["skills"]
    if "summary" in edits1:
        summary = edits1["summary"]

In [None]:
# Step 4 - Rephrase projects
my_resume.experiences = experiences

projects = my_resume.rewrite_section(section=my_resume.projects_raw, verbose=False)
utils.write_yaml(dict(projects=projects))

In [None]:
# Review the generated output in previous cell.
# If any updates are needed, copy the cell output below between the triple quotes
# Set value to """" """" if no edits are needed
edits = """projects:
- Published an open source project using prompt engineering with large
  language models (LLMs) to
  improve resume personalization while ensuring transparency and
  trustworthiness, and showcasing expertise in latest generative AI research and implementation.
- Prototyped an open source project utilizing self hosted transformer bi-modal embeddings models
  for image search, while working with unpublished APIs and ensuring data
  privacy, showcasing proficiency in software engineering and building high-quality technical solutions.
- Applied advanced geospatial statistical techniques to leverage large-scale LiDAR data for
  calculating city-scale building heights, demonstrating the ability to develop scalable solutions
  for high-impact applications.
"""

edits = edits.strip()
if edits:
    edits2 = utils.read_yaml(edits)
    if "experiences" in edits2:
        experiences = edits2["experiences"]
    if "projects" in edits2:
        projects = edits2["projects"]
    if "skills" in edits2:
        skills = edits2["skills"]
    if "summary" in edits2:
        summary = edits2["summary"]

In [None]:
# Step 5 - Extract skills
# This will match the required skills from the job post with your resume sections
# Outputs a combined list of skills extracted from the job post and included in the raw resume
my_resume.experiences = experiences
my_resume.projects = projects

skills = my_resume.extract_matched_skills(verbose=False)
utils.write_yaml(dict(skills=skills))

In [None]:
# Review the generated output in previous cell.
# If any updates are needed, copy the cell output below between the triple quotes
# Set value to """" """" if no edits are needed
edits = """skills:
- category: Technical
  skills:
  - Python
  - Pandas
  - Numpy
  - Sklearn
  - PyTorch
  - AWS Sagemaker
  - MLOps
  - Docker
  - Langchain
  - Sentence-transformers
  - Statistical modeling
  - Matplotlib
  - Pyspark
  - SQL
- category: Non-technical
  skills:
  - Communication
  - Proactiveness
  - Creativity
  - Prioritization
  - Leadership
  - Agile software development
  - Problem solving
"""

edits = edits.strip()
if edits:
    edits3 = utils.read_yaml(edits)
    if "experiences" in edits3:
        experiences = edits3["experiences"]
    if "projects" in edits3:
        projects = edits3["projects"]
    if "skills" in edits3:
        skills = edits3["skills"]
    if "summary" in edits3:
        summary = edits3["summary"]

In [None]:
# Step 6 - Create a resume summary
my_resume.experiences = experiences
my_resume.skills = skills
my_resume.projects = projects

summary = my_resume.write_summary(
    verbose=True,
)
utils.write_yaml(dict(summary=summary))

In [None]:
# Review the generated output in previous cell.
# If any updates are needed, copy the cell output below between the triple quotes
# Set value to """" """" if no edits are needed.
edits = """summary: Results-driven ML and AI researcher with a Ph.D. minor in Artificial Intelligence.
  Demonstrated expertise in end-to-end machine
  learning development, including data processing, feature engineering, model training,
  and evaluation.
  Proven track record in leading ML projects in risk forecasting and fraud detection, 
  and working with large datasets while ensuring data privacy and security compliance.
  Published
  open source projects utilizing large language models (LLMs) and multimodal embeddings,
  ensuring transparency, trustworthiness, and data privacy.
  Proficient in Python, SQL, and AWS Sagemaker, and Pyspark. Strong communication and leadership skills with a demonstrated
  ability to communicate complex technical ideas with clarity and precision. 
"""

edits = edits.strip()
if edits:
    edits4 = utils.read_yaml(edits)
    if "experiences" in edits4:
        experiences = edits4["experiences"]
    if "projects" in edits4:
        projects = edits4["projects"]
    if "skills" in edits4:
        skills = edits4["skills"]
    if "summary" in edits4:
        summary = edits4["summary"]

In [None]:
# Step 7 - Generate final resume yaml for review
my_resume.summary = summary
my_resume.experiences = experiences
my_resume.projects = projects
my_resume.skills = skills

today_date = datetime.today().strftime("%Y%m%d")
resume_filename = os.path.join(
    my_files_dir, sanitize_filename(f"{today_date}__{company_name}__{job_title}")
)
resume_final = my_resume.finalize()
print(f"#filename: {resume_filename}.yaml\n")
utils.write_yaml(resume_final, filename=f"{resume_filename}.yaml")

In [None]:
# Step 8 - Identify resume improvements
# A previously generated resume can also be used here by manually providing resume_filename. Requires the associated parsed job file.
final_resume = Resume_Builder(
    resume=utils.read_yaml(filename=f"{resume_filename}.yaml"),
    parsed_job=utils.read_yaml(filename=f"{resume_filename}.job"),
    is_final=True,
    llm_kwargs=llm_kwargs,
)
improvements = final_resume.suggest_improvements(verbose=False)
utils.write_yaml(dict(improvements=improvements))

In [7]:
# Step 9 - Generate pdf from yaml
# Most common errors during pdf generation occur due to special characters. Escape them with backslashes in the yaml, e.g. $ -> \$
resume_filename = os.path.join(
    my_files_dir, "20231201__Abhineet_Gupta"
)
pdf_file = utils.generate_pdf(yaml_file=f"{resume_filename}.yaml")
display(Markdown((f"[{pdf_file}](<{pdf_file}>)")))

[my_applications/20231201__Abhineet_Gupta.pdf](<my_applications/20231201__Abhineet_Gupta.pdf>)