## Step 2: Resume Parsing and Text Extraction

In [1]:
import os
import docx2txt  # For DOCX files
import PyPDF2    # For PDF files

# Directory with resume files
resume_dir = 'resume/'

In [2]:

# Function to read DOCX or PDF files
def extract_text(file_path):
    if file_path.endswith('.docx'):
        return docx2txt.process(file_path)
    elif file_path.endswith('.pdf'):
        with open(file_path, 'rb') as f:
            pdf_reader = PyPDF2.PdfReader(f)
            text = ''
            for page in pdf_reader.pages:
                text += page.extract_text()
            return text
    else:
        return None

# Loop through all resumes and extract text
resume_texts = {}
for filename in os.listdir(resume_dir):
    if filename.endswith('.pdf') or filename.endswith('.docx'):
        file_path = os.path.join(resume_dir, filename)
        resume_texts[filename] = extract_text(file_path)

## Step 3: Keyword Extraction Based on Job Criteria
Define the job criteria, such as required skills, using a JSON or dictionary structure, and filter resumes based on these criteria.

In [3]:
# Define job criteria
job_criteria = {
    "skills": ["Python", "Machine Learning", "Data Analysis"],
    "experience": 3  # Minimum years of experience required
}

In [4]:
# Function to check for keywords in resumes
def keyword_match(resume_text, job_criteria):
    score = 0
    # Check for skills
    for skill in job_criteria["skills"]:
        if skill.lower() in resume_text.lower():
            score += 1
    # Additional criteria can be added here (like experience)
    return score

In [5]:
# Rank resumes based on keyword match score
resume_scores = {filename: keyword_match(text, job_criteria) for filename, text in resume_texts.items()}
ranked_resumes = sorted(resume_scores.items(), key=lambda x: x[1], reverse=True)

## Step 4: Ranking Resumes and Generating Explanations
You can rank resumes based on the scores obtained from keyword matching and provide a simple explanation for each rank.

In [6]:
# Display ranked resumes with explanations
for i, (filename, score) in enumerate(ranked_resumes, 1):
    print(f"Rank {i}: {filename}")
    print(f"Score: {score}")
    matched_skills = [skill for skill in job_criteria["skills"] if skill.lower() in resume_texts[filename].lower()]
    print(f"Matched Skills: {', '.join(matched_skills)}")
    print("Suggested Improvements: Add relevant skills or experience if missing.")
    print("-" * 50)


Rank 1: 12011623.pdf
Score: 3
Matched Skills: Python, Machine Learning, Data Analysis
Suggested Improvements: Add relevant skills or experience if missing.
--------------------------------------------------
Rank 2: 12635195.pdf
Score: 2
Matched Skills: Python, Data Analysis
Suggested Improvements: Add relevant skills or experience if missing.
--------------------------------------------------
Rank 3: 18067556.pdf
Score: 2
Matched Skills: Python, Data Analysis
Suggested Improvements: Add relevant skills or experience if missing.
--------------------------------------------------
Rank 4: 19396040.pdf
Score: 2
Matched Skills: Python, Data Analysis
Suggested Improvements: Add relevant skills or experience if missing.
--------------------------------------------------
Rank 5: 50328713.pdf
Score: 2
Matched Skills: Python, Machine Learning
Suggested Improvements: Add relevant skills or experience if missing.
--------------------------------------------------
Rank 6: 10265057.pdf
Score: 1
Matc