In [1]:
# Import required libraries with updated imports
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.output_parsers import StrOutputParser

from langchain_core.prompts import ChatPromptTemplate
from langchain_huggingface import HuggingFacePipeline
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch

In [2]:
# Select LLama 3B model
model_id = "meta-llama/Llama-3.2-3B-Instruct"
cache_dir = "./hf_models"

# Check device availability - this is for my macbook / you can use cuda if on windows
device = "mps" if torch.backends.mps.is_available() else "cpu"
print(f"Using device: {device}")
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(
    model_id,
    cache_dir=cache_dir
)

Using device: mps


In [3]:
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.float16 if device in ["mps", "cuda"] else torch.float32,
    device_map="auto",
    cache_dir=cache_dir
)

text_generation_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    temperature=0.1,
    top_p=0.9,
    repetition_penalty=1,
    return_full_text=False,
    clean_up_tokenization_spaces=True
)

llm = HuggingFacePipeline(
    pipeline=text_generation_pipeline,
    model_kwargs={"return_full_text": False} # this line could be deleted
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [4]:
job_description = """At [Company X], we rely on insightful data to power our systems and solutions. We’re seeking an experienced data scientist to deliver insights on a daily basis. The ideal candidate will have mathematical and statistical expertise, along with natural curiosity and a creative mind. While mining, interpreting, and cleaning our data, this person will be relied on to ask questions, connect the dots, and uncover hidden opportunities for realizing the data’s full potential. As part of a team of specialists, the data scientist will “slice and dice” data using various methods and create new visions for the future.
Objectives of this role

    Collaborate with product design and engineering teams to develop an understanding of needs
    Research and devise innovative statistical models for data analysis
    Communicate findings to all stakeholders
    Enable smarter business processes by using analytics for meaningful insights
    Keep current with technical and industry developments

Responsibilities

    Serve as lead data strategist to identify and integrate new datasets that can be leveraged through our product capabilities, and work closely with the engineering team in the development of data products
    Execute analytical experiments to help solve problems across various domains and industries
    Identify relevant data sources and sets to mine for client business needs, and collect large structured and unstructured datasets and variables
    Devise and utilize algorithms and models to mine big-data stores; perform data and error analysis to improve models; clean and validate data for uniformity and accuracy
    Analyze data for trends and patterns, and interpret data with clear objectives in mind
    Implement analytical models in production by collaborating with software developers and machine-learning engineers

Required skills and qualifications

    Seven or more years of experience in data science
    Proficiency with data mining, mathematics, and statistical analysis
    Advanced experience in pattern recognition and predictive modeling
    Experience with Excel, PowerPoint, Tableau, SQL, and programming languages (ex: Java/Python, SAS)
    Ability to work effectively in a dynamic, research-oriented group that has several concurrent projects

Preferred skills and qualifications

    Bachelor’s degree (or equivalent) in statistics, applied mathematics, or related discipline
    Two or more years of project management experience
    Professional certification"""

In [8]:
# Few shot prompting
from langchain_core.prompts import ChatPromptTemplate, FewShotChatMessagePromptTemplate
from examples import RESUME_EXAMPLES


example_prompt = ChatPromptTemplate.from_messages(
        [
            ("human", "{input}"),
            ("ai", "{output}"),
        ]
    )

few_shot_prompt = FewShotChatMessagePromptTemplate(
        example_prompt=example_prompt,
        examples=RESUME_EXAMPLES,
        input_variables=["input"],
    )



In [9]:
resume_optimizer_prompt = ChatPromptTemplate.from_messages([
    ("human", """
Below is a flawed or unstructured resume, a job description the candidate is applying to, and some helpful context from other successful resumes.

Use the job description and context to emphasize relevant skills and experiences. Your job is to rewrite and optimize the resume in valid JSON format aligned with ATS standards.

--- RAW RESUME ---

[
    {
          "highlights": [
           "I messed around with some Twitter data for a research assistant gig — used tweepy to pull tweets and tried to do some basic sentiment stuff",
           "My advisor wanted to see if we could find anything about public opinion shifts during a product launch",
           "I didn’t know much NLP but figured out how to use NLTK and TextBlob after watching some YouTube videos"
         ]
       }
     ]

--- JOB DESCRIPTION ---
{job_description}

Please generate the optimized JSON resume:
""")
])



full_prompt = few_shot_prompt | resume_optimizer_prompt

resume_optimizer_chain = full_prompt | llm | JsonOutputParser()

optimized_resume_json = resume_optimizer_chain.invoke({"job_description": job_description})

print(optimized_resume_json)

KeyError: "Input to FewShotChatMessagePromptTemplate is missing variables {'input'}.  Expected: ['input'] Received: ['job_description']\nNote: if you intended {input} to be part of the string and not a variable, please escape it with double curly braces like: '{{input}}'.\nFor troubleshooting, visit: https://python.langchain.com/docs/troubleshooting/errors/INVALID_PROMPT_INPUT "

```json
{
  "$schema": "https://raw.githubusercontent.com/jsonresume/resume-schema/v1.0.0/schema.json",
  "basics": {
    "name": "Richard Hendriks",
    "label": "Data Scientist",
    "image": "",
    "email": "richard.hendriks@mail.com",
    "phone": "(912) 555-4321",
    "url": "http://richardhendricks.example.com",
    "summary": "Results-driven data scientist with expertise in data analysis, machine learning, and statistical modeling. Proven track record of delivering insights that drive business growth.",
    "location": {
      "address": "2712 Broadway St",
      "postalCode": "CA 94115",
      "city": "San Francisco",
      "countryCode": "US",
      "region": "California"
    },
    "profiles": [
      {
        "network": "Twitter",
        "username": "neutralthoughts",
        "url": "https://www.twitter.com"
      },
      {
        "network": "SoundCloud",
        "username": "dandymusicnl",
        "url": "https://soundcloud.example.com/dandym
