# 📄 Resume Ranking NLP Model
This notebook ranks resumes based on how well they match a job description using NLP techniques.

In [None]:
# Install required packages (if not already installed)
# !pip install pandas scikit-learn nltk

import pandas as pd
from src.preprocess import clean_text
from src.ranker import rank_resumes
import nltk
nltk.download('stopwords')
nltk.download('wordnet')


In [None]:
# Load the resume dataset
df = pd.read_csv('data/resume_dataset/UpdatedResumeDataSet.csv')
df.dropna(subset=['Resume'], inplace=True)
df['cleaned_resume'] = df['Resume'].apply(clean_text)
df.head()


In [None]:
# Enter a job description to rank resumes against
job_description = """Looking for a data analyst with experience in Python, SQL, machine learning, and dashboard tools like Tableau or Power BI."""
job_cleaned = clean_text(job_description)


In [None]:
# Rank resumes using TF-IDF and cosine similarity
resume_texts = df['cleaned_resume'].tolist()
ranked_indices, scores = rank_resumes(resume_texts, job_cleaned)

# Display top results
top_k = 5
for i in range(top_k):
    idx = ranked_indices[i]
    print(f"{i+1}. Score: {scores[i]:.4f}")
    print(df.iloc[idx]['Resume'][:400])
    print("---\n")
