In [1]:
import re
import random
from collections import Counter
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import spacy

# Load the spaCy model
nlp = spacy.load("en_core_web_sm")

# Define a list of stopwords
STOPWORDS = nlp.Defaults.stop_words

# Function to clean and preprocess text
def preprocess_text(text):
    doc = nlp(text.lower())
    # Remove stopwords and punctuation, and return clean tokens
    return [token.text for token in doc if token.is_alpha and token.text not in STOPWORDS]

# Extract keywords from text
def extract_keywords(text):
    tokens = preprocess_text(text)
    return set(tokens)

# Generate synthetic data for testing purposes
def generate_synthetic_resume():
    skills = ["Python", "Machine Learning", "Data Analysis", "SQL", "TensorFlow", "Deep Learning", "Pandas", "NLP"]
    experiences = [
        "Worked on machine learning models.",
        "Performed data analysis using Python.",
        "Experience in TensorFlow and PyTorch.",
        "Strong knowledge of data pipelines."
    ]
    return f"{random.choice(skills)}. {random.choice(experiences)}."

def generate_synthetic_job_description():
    job_roles = [
        "Machine Learning Engineer", "Data Scientist", "Software Developer", "AI Researcher"
    ]
    job_keywords = [
        "Python", "TensorFlow", "SQL", "Data Visualization", "Deep Learning",
        "NLP", "Pandas", "Scikit-learn", "Big Data", "Artificial Intelligence"
    ]
    description = f"Job role: {random.choice(job_roles)}. Required skills: {', '.join(random.sample(job_keywords, 5))}."
    return description

# Evaluate resume against the job description
def evaluate_resume(resume_text, job_description):
    # Extract keywords from both texts
    resume_keywords = extract_keywords(resume_text)
    job_keywords = extract_keywords(job_description)

    # Find matching and missing keywords
    matching_keywords = resume_keywords.intersection(job_keywords)
    missing_keywords = job_keywords.difference(resume_keywords)

    # Compute the match percentage
    match_percentage = len(matching_keywords) / len(job_keywords) * 100

    # Use TF-IDF for cosine similarity comparison
    vectorizer = TfidfVectorizer()
    tfidf_matrix = vectorizer.fit_transform([resume_text, job_description])
    similarity = cosine_similarity(tfidf_matrix)[0, 1] * 100

    return {
        "match_percentage": match_percentage,
        "similarity_score": similarity,
        "matching_keywords": matching_keywords,
        "missing_keywords": missing_keywords
    }

# Main execution with synthetic data
if __name__ == "__main__":
    print("=== Resume and Job Description Evaluator with NLP Enhancements ===")

    # Generate synthetic test data
    synthetic_resume = generate_synthetic_resume()
    synthetic_job_desc = generate_synthetic_job_description()

    print("\nSynthetic Resume:")
    print(synthetic_resume)
    print("\nSynthetic Job Description:")
    print(synthetic_job_desc)

    # Evaluate the resume
    feedback = evaluate_resume(synthetic_resume, synthetic_job_desc)

    # Display results
    print("\n=== Feedback ===")
    print(f"Match Percentage: {feedback['match_percentage']:.2f}%")
    print(f"Cosine Similarity: {feedback['similarity_score']:.2f}")
    print(f"Matching Keywords: {', '.join(feedback['matching_keywords'])}")
    print(f"Missing Keywords: {', '.join(feedback['missing_keywords'])}")


=== Resume and Job Description Evaluator with NLP Enhancements ===

Synthetic Resume:
Data Analysis. Experience in TensorFlow and PyTorch..

Synthetic Job Description:
Job role: AI Researcher. Required skills: Big Data, SQL, TensorFlow, Deep Learning, NLP.

=== Feedback ===
Match Percentage: 15.38%
Cosine Similarity: 11.91
Matching Keywords: data, tensorflow
Missing Keywords: role, required, ai, deep, job, learning, skills, big, researcher, sql, nlp


In [9]:
!pip install spacy sentence-transformers 

Collecting sentence-transformers
  Downloading sentence_transformers-3.3.1-py3-none-any.whl.metadata (10 kB)
Collecting numpy>=1.19.0 (from spacy)
  Using cached numpy-2.0.2-cp312-cp312-win_amd64.whl.metadata (59 kB)
Downloading sentence_transformers-3.3.1-py3-none-any.whl (268 kB)
Using cached numpy-2.0.2-cp312-cp312-win_amd64.whl (15.6 MB)
Installing collected packages: numpy, sentence-transformers
  Attempting uninstall: numpy
    Found existing installation: numpy 1.26.4
    Uninstalling numpy-1.26.4:
      Successfully uninstalled numpy-1.26.4
Successfully installed numpy-2.0.2 sentence-transformers-3.3.1


  You can safely remove it manually.
  You can safely remove it manually.
ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
contourpy 1.2.0 requires numpy<2.0,>=1.20, but you have numpy 2.0.2 which is incompatible.
gensim 4.3.3 requires numpy<2.0,>=1.18.5, but you have numpy 2.0.2 which is incompatible.
pennylane-qiskit 0.39.1 requires sympy<1.13, but you have sympy 1.13.1 which is incompatible.


In [14]:
import os
import re
import tensorflow as tf
from sentence_transformers import SentenceTransformer, util

# Disable TensorFlow eager execution compatibility warning
tf.compat.v1.disable_eager_execution()

# Disable the symlink warning for Huggingface cache
os.environ['HF_HUB_DISABLE_SYMLINKS_WARNING'] = '1'

class ResumeJobEvaluator:
    def __init__(self):
        self.model = SentenceTransformer('all-MiniLM-L6-v2')

    def clean_text(self, text):
        text = text.lower()
        text = re.sub(r'\s+', ' ', text).strip()
        text = re.sub(r'[^\w\s]', '', text)
        return text

    def get_similarity_score(self, resume, job_description):
        resume_clean = self.clean_text(resume)
        job_clean = self.clean_text(job_description)

        resume_embedding = self.model.encode(resume_clean, convert_to_tensor=True)
        job_embedding = self.model.encode(job_clean, convert_to_tensor=True)

        similarity = util.cos_sim(resume_embedding, job_embedding).item()
        return similarity

    def provide_feedback(self, resume, job_description):
        similarity_score = self.get_similarity_score(resume, job_description)
        print(f"Similarity Score: {similarity_score:.4f}")

        if similarity_score < 0.7:
            print("\nFeedback for Improvement:")
            print("- Your experience may not closely match the job description.")
            print("- Highlight more relevant skills and experiences.")
        elif similarity_score < 0.85:
            print("\nGood Match, but some improvements could be made:")
            print("- Focus on aligning your work experience more closely with the job role requirements.")
        else:
            print("\nExcellent match! Your resume aligns well with the job description.")

# Define synthetic datasets for testing
synthetic_resume = """
Experienced software engineer with a strong background in machine learning and NLP. 
Proficient in Python, PyTorch, and TensorFlow. Worked on multiple AI-driven projects.
Skills: Python, Machine Learning, NLP, Data Science, TensorFlow, PyTorch
"""

synthetic_job_description = """
Looking for a Machine Learning Engineer with expertise in Natural Language Processing (NLP), Python, 
TensorFlow, and PyTorch. Must have hands-on experience working with transformer models and AI projects.
"""

evaluator = ResumeJobEvaluator()
evaluator.provide_feedback(synthetic_resume, synthetic_job_description)



Similarity Score: 0.8236

Good Match, but some improvements could be made:
- Focus on aligning your work experience more closely with the job role requirements.


In [15]:
synthetic_resume = """
Data Scientist with expertise in machine learning, deep learning, and natural language processing. Proficient in Python, Scikit-learn, Keras, and Pandas. Experience working on large-scale data processing and predictive modeling projects.

"""

synthetic_job_description = """
We are seeking a Data Scientist with a strong background in machine learning, NLP, and deep learning. Must be proficient in Python, Scikit-learn, TensorFlow, and data analysis tools. Experience with predictive modeling, large-scale data processing, and cloud deployment.

"""

evaluator = ResumeJobEvaluator()
evaluator.provide_feedback(synthetic_resume, synthetic_job_description)

Similarity Score: 0.8069

Good Match, but some improvements could be made:
- Focus on aligning your work experience more closely with the job role requirements.


In [16]:
synthetic_resume = """
Marketing Specialist with experience in content creation, digital marketing, and SEO. Proficient in tools like Google Analytics, Adobe Creative Suite, and social media platforms. Knowledge of basic data analysis but not focused on machine learning or deep learning.


"""

synthetic_job_description = """
We are seeking a Data Scientist with a strong background in machine learning, NLP, and deep learning. Must be proficient in Python, Scikit-learn, TensorFlow, and data analysis tools. Experience with predictive modeling, large-scale data processing, and cloud deployment.

"""

evaluator = ResumeJobEvaluator()
evaluator.provide_feedback(synthetic_resume, synthetic_job_description)

Similarity Score: 0.5333

Feedback for Improvement:
- Your experience may not closely match the job description.
- Highlight more relevant skills and experiences.
