In [1]:

import pandas as pd
import pickle
import re
import nltk
from nltk.tokenize import word_tokenize

nltk.download('punkt')


[nltk_data] Downloading package punkt to /Users/abhishek/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [2]:

# Load sample resumes
sample_df = pd.read_csv("/Users/abhishek/Downloads/sample_resumes.csv")


In [3]:

# Load the trained naive keyword model
with open("/Users/abhishek/Projects/aipi540/resume_optimization/lllm-resume-optimizer/models/category_keywords.pkl", "rb") as file:
    category_keywords = pickle.load(file)


In [4]:

# Text cleaning function
def clean_text(text):
    text = str(text).lower()
    text = re.sub(r'[^a-z\s]', '', text)
    return word_tokenize(text)

# Match resume to top job title using keyword overlap
def match_resume_keywords(resume_words):
    match_scores = []
    for job_title, keywords in category_keywords.items():
        common = set(resume_words) & set(keywords)
        score = len(common) / len(keywords) if keywords else 0
        match_scores.append((job_title, score))
    top_match = sorted(match_scores, key=lambda x: x[1], reverse=True)[0]
    return top_match


In [7]:

# Evaluate Top-1 Accuracy
results = []

for _, row in sample_df.iterrows():
    resume_words = clean_text(row["resume_content"])
    predicted_title, score = match_resume_keywords(resume_words)
    actual_title = row["title"]
    is_correct = int(actual_title.strip().lower() == predicted_title.strip().lower())
    correct += is_correct
    results.append({
        "actual_title": actual_title,
        "predicted_title": predicted_title,
        "match_score": round(score * 100, 2)
    })

results_df = pd.DataFrame(results)


In [8]:

# Show results
results_df[["actual_title", "predicted_title", "match_score"]]


Unnamed: 0,actual_title,predicted_title,match_score
0,Data Scientist,Data Scientist - FinTech,40.0
1,"Senior Data Scientist, FP&A","Senior Data Scientist, FP&A",30.0
2,BI Developer (Tableau),Business Intelligence Developer/Analyst,30.0
3,Search & Information Retrieval Engineer,BIG Data Engineer,30.0
4,Machine Learning Engineer,Machine Learning Infrastructure Engineer,50.0
5,Software Engineer (Backend),BIG Data Engineer,30.0
6,NLP Research Intern,Natural Language Processing Expert,35.0
7,Cloud Solutions Architect,Global Solutions Architect,30.0
8,Data Analyst,Data Analyst,40.0
9,Product Manager (AI),Product Manager- Business Intelligence,35.0


## 8 samples match correctly with predicted title