In [1]:
import os
import anthropic
import PyPDF2
import json
import pandas as pd


In [2]:
def load_env_from_json(file_path):
    """Loads security keys from a JSON file and sets them as environment variables."""
    try:
        with open(file_path, 'r') as file:
            secrets = json.load(file)
            for key, value in secrets.items():
                os.environ[key] = value
                print(f"Loaded {key} into environment variables")  # Optional, for debugging
    except FileNotFoundError:
        print(f"Error: The file {file_path} was not found.")
    except json.JSONDecodeError:
        print(f"Error: Could not parse {file_path}. Ensure it is valid JSON.")

In [3]:
load_env_from_json('secrets.json')

Loaded ANTHROPIC_API_KEY into environment variables


In [4]:
def extract_text_from_pdf(pdf_path):
    """
    Extracts text from a PDF file.
    """
    text = ""
    with open(pdf_path, "rb") as pdf_file:
        reader = PyPDF2.PdfReader(pdf_file)
        for page in reader.pages:
            text += page.extract_text() + "\n"
    return text.strip()

In [5]:
def keywords_extraction_prompt(model):
    system_prompt = ""
    if model == "CLAUDE":
        system_prompt = """
            You are an AI text analysis expert specializing in job market intelligence. Carefully analyze the following job description to extract the MOST SIGNIFICANT keywords that would be valuable for resume tailoring and job market analysis.
            Categorize and extract keywords in these specific areas:
            1. TECHNICAL REQUIREMENTS:
               - Hard skills (specific proficiencies)
               - Technologies/programming languages/frameworks
               - Software/platforms/tools
               - Technical certifications/qualifications        
            2. EXPERIENCE & EXPERTISE:
               - Years of experience
               - Domain expertise
               - Industry-specific knowledge
               - Required credentials/education
                    3. JOB RESPONSIBILITIES:
               - Core functions
               - Key deliverables
               - Project management aspects
               - Performance metrics mentioned
            4. PROFESSIONAL ATTRIBUTES:
               - Soft skills
               - Communication styles
               - Collaboration approaches
               - Leadership requirements
            5. COMPANY & INDUSTRY CONTEXT:
               - Industry terminology
               - Business domain vocabulary
               - Company-specific jargon
               - Market positioning indicators
            For each category, list only the 5-7 most critical terms that would significantly impact application success. Prioritize specialized terms over generic ones. Format your response as comma-separated values within each category.
            """
    elif model == "CHATGPT":
        system_prompt = """
            You are an AI assistant specializing in job description analysis. Your task is to extract **only the most relevant and high-impact keywords** from the given job description.
            
            Focus on keywords that are essential for understanding the job role, filtering out generic terms. The extracted keywords should be valuable for **resume matching, candidate profiling, and industry trend analysis**.
            
            #### **Categories to Extract Keywords From:**
            - **Core Skills & Expertise**: Technical and domain-specific skills required for the role.
            - **Tools, Technologies, & Frameworks**: Programming languages, software, methodologies, and platforms used in the job.
            - **Key Responsibilities**: Actionable tasks, duties, and role expectations.
            - **Soft Skills**: Essential interpersonal and workplace competencies.
            - **Industry-Specific Terminology**: Jargon, compliance requirements, certifications, and any sector-related terminology.
            
            #### **Response Format:**
            Return the extracted keywords as a **concise, comma-separated list**, prioritizing **high-value terms** that best describe the job role.
            """    
    return system_prompt

In [6]:
def execute_question(system_prompt, user_message):
    client = anthropic.Anthropic(api_key=ANTHROPIC_API_KEY)
    response = client.messages.create(
        model="claude-3-7-sonnet-20250219",
        max_tokens=1000,
        temperature=0.3,
        system=system_prompt,
        messages=[{"role": "user", "content": user_message}]
    )
    return response.content


In [7]:
def execute_question2(system_prompt, user_message):
    client = anthropic.Anthropic(api_key=ANTHROPIC_API_KEY)
    response = client.messages.create(
        model="claude-3-7-sonnet-20250219",
        max_tokens=1000,
        temperature=0.3,
        system=system_prompt,
        messages=[{"role": "user", "content": user_message}]
    )
    
    # Extract the text from the first content block
    if response.content and len(response.content) > 0:
        # Remove markdown headers and leading/trailing whitespace
        clean_text = response.content[0].text.replace('# About Me\n\n', '').strip()
        return clean_text
    
    return ""  # Return empty string if no content

In [8]:
def get_keywords(job_description, model):
    system_prompt = keywords_extraction_prompt(model)    
    user_message = f"""
        Job Description:
        ---
        {job_description}
        ---
        """
    question_response = execute_question(system_prompt, user_message)
    return question_response


In [9]:
# Set your API key
ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY")
# Path where the JD is stored
PDF_DIR = "JD_PATH"
# Name of the JD file
FILE_NAME = "jd_sample.pdf"
jd_path = PDF_DIR + FILE_NAME
# Options for the prompt to use, I got them from 2 different platforms
# CLAUDE | CHATGPT
use_prompt = "CLAUDE"
# Resume PDF
PDF_RESUME_DIR = "PATH_OF_RESUME_PDF"
PDF_RESUME_NAME = "personalresume.PDF"

In [10]:
RESUME_DIR = "PATH_OF_RESUME_JSON"
RESUME_NAME = "personal_resume.json"
resume_path = RESUME_DIR + RESUME_NAME
pdf_path = PDF_RESUME_DIR + PDF_RESUME_NAME

In [11]:
jd_text = extract_text_from_pdf(jd_path)
resume_text = extract_text_from_pdf(pdf_path)

if jd_text:
    keywords = get_keywords(jd_text, use_prompt)
    print("\nExtracted Keywords:\n", keywords)
else:
    print("Could not extract text from the PDF.")


Extracted Keywords:
 [TextBlock(text="## KEYWORD ANALYSIS\n\n### 1. TECHNICAL REQUIREMENTS:\nTechnical project management, Agile, Scrum, Lean, Kanban, JIRA, Datadog, Confluence, Slack, cloud technologies, SDLC process methods, system architecture, technical frameworks\n\n### 2. EXPERIENCE & EXPERTISE:\n5+ years technical project management experience, Bachelor's degree in Computer Science/Engineering, project management certifications (PMP, PRINCE2, Certified Scrum Master), experience with marketplace-based business models, experience scaling products/systems\n\n### 3. JOB RESPONSIBILITIES:\nManaging high-priority projects, defining project scope/timeline/success criteria, identifying critical path dependencies, proactive risk identification, coordinating cross-functional teams, establishing metrics/reporting mechanisms, driving process improvements, ensuring QA sign-off\n\n### 4. PROFESSIONAL ATTRIBUTES:\nExceptional organizational skills, excellent verbal/written communication, prob

In [12]:
def parse_resume_json(file_path):
    """
    Reads a JSON file containing resume sections and converts it into a Pandas DataFrame.
    
    :param file_path: Path to the JSON file
    :return: Pandas DataFrame
    """
    with open(file_path, "r", encoding="utf-8") as file:
        data = json.load(file)

    # Extract relevant fields
    rows = []
    for section in data["resume_sections"]:
        rows.append({
            "section_name": section.get("section_name", ""),
            "action": section.get("action", ""),
            "rules": "\n".join(section.get("rules", [])),
            "section_text": section.get("section_text", "")
        })

    # Convert to DataFrame
    df = pd.DataFrame(rows)
    
    return df

In [13]:
def parse_resume_json_dynamic(file_path):
    """
    Reads a JSON file containing resume sections and converts it into a Pandas DataFrame.
    
    :param file_path: Path to the JSON file
    :return: Pandas DataFrame
    """
    with open(file_path, "r", encoding="utf-8") as file:
        data = json.load(file)
    
    # Extract relevant fields
    rows = []
    for section in data["resume_sections"]:
        # Prepare a dictionary to store section data
        section_data = {
            "section_name": section.get("section_name", ""),
            "action": section.get("action", ""),
            "rules": "\n".join(section.get("rules", [])),
            "section_text_header": section.get("section_text_header", "")
        }
        
        # Dynamically collect bullet points
        bullets = []
        for i in range(1, 8):  # Check for up to 7 bullet points
            bullet_key = f"section_text_bullet{i}"
            if bullet_key in section:
                bullets.append(section[bullet_key])
        
        # Join bullets into a single text
        section_data["section_text_bullets"] = "\n".join(bullets)
        
        rows.append(section_data)
    
    # Convert to DataFrame
    df = pd.DataFrame(rows)
    
    return df

In [14]:
df = parse_resume_json_dynamic(resume_path)

In [15]:
# Iterate through all rows
for index, row in df.iterrows():
    if row["section_name"] == "cover_letter":
        system_prompt = "Action:" + row['action'] + "\n Rules:" + row['rules'] + "\n Keywords:"  + ', '.join(str(x) for x in keywords) + "\n COMPLETE RESUME:" + resume_text
        user_message = "Job Description:" + jd_text 
        print("#" * 50)  # Separator for better readability
        response = execute_question2(system_prompt, user_message)
        print(row["section_name"])
        print(response)
        print("#" * 50)  # Separator for better readability
    else:
        system_prompt = "Action:" + row['action'] + "\n Rules:" + row['rules'] + "\n Keywords:"  + ', '.join(str(x) for x in keywords)
        user_message = row['section_text_header'] + row["section_text_bullets"]
        response = execute_question2(system_prompt, user_message)
        print(row["section_name"])
        print(response)
        print("-" * 50)  # Separator for better readability


about
I've updated your "about" section to align with the Product Manager role at Infosys, incorporating relevant keywords from the job description:

Strategic Technical Program Manager with 5+ years experience leading enterprise data platforms and digital transformation initiatives. Expertise in Agile/Scrum methodologies, JIRA, and full SDLC processes while driving cross-functional engineering teams across Media & Entertainment and Retail industries. Proven track record in project scope definition, risk identification, and establishing metrics for success. Consistently deliver multimillion-dollar cost savings through innovative technical solutions and process improvements. Core strengths in stakeholder management, vendor partnerships, and scaling products/systems with an operational, results-driven mindset.
--------------------------------------------------
skills_section
## Skills

**Leadership:** Product Management | Agile Framework | Cross-functional Team Leadership | Technical Pro