In [17]:
import os
import re
import pandas as pd
from docx import Document

# -------- CLEAN TEXT --------
def clean_text(text):
    text = str(text).lower()
    text = re.sub(r'[^a-z0-9 ]', ' ', text)
    text = re.sub(r'\s+', ' ', text)
    return text.strip()

# -------- SKILL EXTRACTION --------
def extract_skills(text, skills_list):
    found = []
    for skill in skills_list:
        if re.search(rf"\b{re.escape(skill)}\b", text, re.IGNORECASE):
            found.append(skill)
    return found

# -------- JOB DESCRIPTION --------
job_skills_list = [
    "python", "java", "c++", "html", "css", "javascript",
    "machine learning", "data analysis", "numpy", "pandas",
    "software engineering", "sql", "deep learning"
]

job_description = """
Looking for a candidate with skills in Python, HTML, CSS,
software engineering, data analysis, numpy, and machine learning.
"""

job_description_cleaned = clean_text(job_description)
job_skills_required = extract_skills(job_description_cleaned, job_skills_list)

print("Required skills:", job_skills_required)

# -------- READ ALL RESUMES & CHECK SKILLS --------
results = []

for file in os.listdir():
    file_path = os.path.join(os.getcwd(), file)
    text = ""

    # WORD FILES
    if file.lower().endswith(".docx"):
        try:
            doc = Document(file_path)
            text = " ".join(p.text for p in doc.paragraphs)
        except:
            continue

    # CSV FILES
    elif file.lower().endswith(".csv"):
        try:
            df = pd.read_csv(file_path)
            text = " ".join(df.astype(str).values.flatten())
        except:
            continue

    if text:
        text_cleaned = clean_text(text)
        skills_found = extract_skills(text_cleaned, job_skills_list)
        # Check which required skills are present in CV
        matched_skills = [skill for skill in skills_found if skill in job_skills_required]
        if matched_skills:
            match_percent = round((len(matched_skills) / len(job_skills_required)) * 100, 2)
            results.append({
                "CV File": file,
                "Matched Skills": ", ".join(matched_skills),
                "Skill Match %": match_percent
            })

# -------- SHOW RESULTS --------
if results:
    df_results = pd.DataFrame(results)
    df_results = df_results.sort_values(by="Skill Match %", ascending=False)
    df_results.reset_index(drop=True, inplace=True)
    df_results["Rank"] = range(1, len(df_results) + 1)
    print("\n===== Resume Skills Match Results =====")
    print(df_results)
else:
    print("No resumes contain the required skills.")


Required skills: ['python', 'html', 'css', 'machine learning', 'data analysis', 'numpy', 'software engineering']

===== Resume Skills Match Results =====
             CV File                                     Matched Skills  \
0  full_cv_demo.docx  python, html, css, machine learning, data anal...   
1    Resume 2-2.docx                           python, html, css, numpy   
2          john.docx                           python, html, css, numpy   
3      Resume 2.docx                   css, numpy, software engineering   
4            amy.csv                           python, machine learning   
5          mark.docx                                                css   

   Skill Match %  Rank  
0         100.00     1  
1          57.14     2  
2          57.14     3  
3          42.86     4  
4          28.57     5  
5          14.29     6  


In [None]:
df_results.to_csv('resume_matching_results.csv', index=False)
print('Results saved to resume_matching_results.csv')