# Extract code from resumes

In [17]:
import pdfplumber
import docx2txt

def extract_text_from_pdf(pdf_path):
    with pdfplumber.open(pdf_path) as pdf:
        return "\n".join([page.extract_text() for page in pdf.pages if page.extract_text()])

def extract_text_from_docx(docx_path):
    return docx2txt.process(docx_path)

def extract_resume_text(file_path):
    if file_path.endswith(".pdf"):
        return extract_text_from_pdf(file_path)
    elif file_path.endswith(".docx"):
        return extract_text_from_docx(file_path)
    else:
        raise ValueError("Unsupported file format! Use PDF or DOCX.")

# Convert Resume and JD into embeddings

In [None]:
import os
os.environ["OPENAI_API_KEY"] = "your_api_key"

In [19]:
from langchain.embeddings.openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings()

def get_text_embedding(text):
    return embeddings.embed_query(text)

# LLM training for recs

In [20]:
llm_context = """
You are an experienced headhunter specializing in helping early-career and associate-level professionals
optimize their resumes for job applications. Your goal is to analyze a candidate's resume and compare it to
a job description to provide insightful recommendations.

Your output should be structured as follows:

{
  "key_strengths": [
    "Highlight relevant skills & experiences in the resume, focusing on alignment with the job description."
  ],
  "missing_skills": [
    "Identify critical missing skills based on the JD, prioritizing technical skills, tools, or industry-specific knowledge."
  ],
  "recommendations": [
    "Provide specific suggestions on improving the resume. Include example modifications such as what bullet point to update and how it should be reworded."
  ]
}

Use clear, professional, and motivating language. Avoid generic terms and focus on industry-specific skills.
Your output should be worded towards the candidate using phrases such as:
- 'You should consider adding...'
- 'Your resume would benefit from including...'
- 'It may be helpful to highlight your experience with...'

Ensure the response is a **valid JSON object** with no additional text.
STRICT RULE: Do not include extra commentary, explanations, or markdown formatting. ONLY return the JSON response.
"""

In [21]:
from openai import OpenAI
import json

client = OpenAI()

def analyze_resume_with_llm(resume_text, jd_text):
    prompt = f"""
    {llm_context}

    Candidate's Resume:
    {resume_text}

    Job Description:
    {jd_text}

    Analyze the resume based on the job description and return the structured JSON response.
    """

    response = client.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}]
    )

    llm_output = response.choices[0].message.content.strip()
    
    #print("\n🔍 Raw LLM Output:\n", llm_output)  # DEBUG: Print raw response

    try:
        parsed_response = json.loads(llm_output)
        return parsed_response  # Now returns structured data
    except json.JSONDecodeError:
        print("🚨 Error: LLM response is not valid JSON.")
        return None


# Cosine similarity for % matching

In [22]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

def calculate_match_score(resume_text, jd_text):
    resume_embedding = get_text_embedding(resume_text)
    jd_embedding = get_text_embedding(jd_text)

    match_score = cosine_similarity([resume_embedding], [jd_embedding])[0][0]
    return round(match_score * 100, 2)  # Convert to percentage

# Find missing key words

In [23]:
def find_missing_skills(resume_text, jd_text):
    analysis = analyze_resume_with_llm(resume_text, jd_text)
    
    # Extract "Missing Skills" section from the LLM response
    missing_skills_start = analysis.find("**Missing skills**:")
    recommendations_start = analysis.find("**Actionable recommendations**:")

    if missing_skills_start != -1 and recommendations_start != -1:
        missing_skills = analysis[missing_skills_start + len("**Missing skills**:"): recommendations_start].strip()
        return missing_skills
    else:
        return "No missing skills detected."

# Test Case

In [24]:
if __name__ == "__main__":
    resume_path = "Currea_Jose_Resume.pdf"
    jd_text = """Position Summary

The Senior Analyst, Strategy & Analytics is responsible for the development and implementation of data projects and automated reporting tools. This role will develop processes for accurate data collection, modeling, analysis, visualization, and reporting. This role is part of the International Planning organization and contributes to initiatives that drive sales and revenue growth, increased profitability, and long-term strategies across international markets. Success in this role will require that the candidate have strong technical skills as well as the ability to partner and communicate effectively with international stakeholders and cross-functional business teams.

Essential Duties And Responsibilities

 Assist in the development of data-driven strategies and tactics to optimize revenue, return on investment, and profitability. 
 Develop reporting that measures performance trends and drivers, forecast and revenue track, and revenue performance by channel. 
 Collect, analyze, and interpret data to support business strategies, uncover insights, and identify trends. 
 Assist in developing and implementing strategies that align with the organization’s goals and objectives. Support strategic projects with data analysis and performance reporting. 
 Contribute to initiatives that generate new insights through analytics. Ensure those insights translate into actionable plans that create revenue and growth opportunities. 
 Prepare and present comprehensive reports and executive presentations that summarize insights and recommendations to stakeholders at all levels. 
 Develop clear and impactful recommendations based on data findings that address key business challenges and opportunities for improvement. 
 Conduct market analysis to identify industry trends, customer behaviors, and competitive positioning, helping the organization stay ahead of the curve. 
 Translate complex data into clear, actionable recommendations that inform key business decisions. 
 Present insights to executive leadership in a clear and compelling manner to facilitate decision-making. 
 Identify process improvements and optimization opportunities across analytics and reporting functions. 
 Work closely with IT and Enterprise BI teams to ensure data quality and stability, and implementation of new reporting tools and improvements. 
 Is part of a team responsible for reporting, analysis, and visualization for international markets and in-market sales and commercial teams. 

Qualifications, Knowledge, And Skills

 Bachelor’s degree in computer science, analytics, mathematics, statistics, or similar 
 2+ years’ experience leveraging data to drive business decision making 
 Bachelor’s degree in computer science, analytics, mathematics, statistics, or similar 
 2+ years’ experience leveraging data to drive business decision making. 
 Strong knowledge of SQL 
 Experience in data visualization tools such as Tableau, Power BI 
 Knowledge of AWS, Azure, and data analytics tools like SQL, SAS, Python and Databricks 
 Experience with Power Automate 
 Excellent written and oral communication skills 
 Ability to work well with non-technical business partners. 
 Ability to deliver results in timely manner with consistency and precision. 
 Ability to work independently or as part of a collaborative team. 
 Ability to tackle challenging projects and to quickly gain knowledge needed to deliver results. 
 Willingness and ability to tackle challenging projects and to quickly gain knowledge needed to deliver results. 

We know there's a lot to consider. As you go through the application process, our recruiters will be glad to provide guidance, and more relevant details to answer any additional questions. Thank you again for your interest in Royal Caribbean Group. We'll hope to see you onboard soon!"""

    resume_text = extract_resume_text(resume_path)
    if not resume_text.strip():
        print("🚨 Error: Resume text is empty! Check the extraction function.")
        exit()

    match_score = calculate_match_score(resume_text, jd_text)

    parsed_response = analyze_resume_with_llm(resume_text, jd_text)

    if parsed_response:
        key_strengths = parsed_response.get("key_strengths", [])
        missing_skills = parsed_response.get("missing_skills", [])
        recommendations = parsed_response.get("recommendations", [])

        print(f"\n✅ **Match Score:** {match_score}%\n")

        print("💪 **Key Strengths:**")
        for strength in key_strengths:
            print(f"   • {strength}")

        print("\n🔍 **Missing Skills:**")
        if missing_skills:
            for skill in missing_skills:
                print(f"   • {skill}")
        else:
            print("   ✅ No missing skills detected!")

        print("\n💡 **Recommendations:**")
        for recommendation in recommendations:
            print(f"   • {recommendation}")

    else:
        print("🚨 Error processing the LLM response. Ensure JSON format is correct.")



✅ **Match Score:** 77.92%

💪 **Key Strengths:**
   • You have a strong background in data analysis with tools like Python, SQL, SAS and visualization tools like Power BI and Tableau which aligns well with the job requirements.
   • Your experience in using data to inform business strategies, as demonstrated at Bank of Bogota and Merqueo, is highly relevant to the role.
   • Your exposure to working with cross-functional teams and presenting insights to executive leadership speaks to your ability to communicate effectively with non-technical business partners and stakeholders.

🔍 **Missing Skills:**
   • Your resume lacks explicit mention of experience with Azure, although you have used comparable tools such as AWS.
   • There's no mention of experience with Power Automate which is named in the job description.
   • While you have worked on multiple data projects, it would be beneficial to specify instances where you have actively developed and implemented data collection processes, mo