# Resume to Job Description Matching Algorithm

In [3]:
%%capture
# Imports
import os
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer, util
import json
import spacy

sentence_model = SentenceTransformer("jinaai/jina-embeddings-v3", trust_remote_code=True)

# Embedding Model 
# Computer Science (field of study)
# Software Engineer ()

# Settings
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [49]:
current_wd = os.getcwd()

job_desc_data = pd.read_csv(os.path.join(current_wd, "data", "job_descriptions_latest.csv"))
resume_data = pd.read_csv(os.path.join(current_wd, "data", "resume_extraction_samples_3.1.25.xlsx - Sheet1.csv"))

# Convert job_desc_json from strings to dict
job_desc_data["job_desc_parsed"] = job_desc_data["extracted"].apply(json.loads)

# Convert resume_json from strings to dict
resume_data["resume_parsed"] = resume_data["extracted"].apply(json.loads)

# Select just the JSON Columns
job_desc_json = job_desc_data['job_desc_parsed']

resume_json = resume_data['resume_parsed']

### All Functions Required Functions
- NLP Similarity 

In [5]:
def nlp_similarity(text1, text2): 
    """
    Uses spaCy to compute semantic similarity between two strings (0..1)
    """
    embedding1 = sentence_model.encode(text1, convert_to_tensor=True)
    embedding2 = sentence_model.encode(text2, convert_to_tensor=True)

    cosine_sim = util.cos_sim(embedding1, embedding2)

    return cosine_sim

### Update 03/02/2025 Cached NLP Similarity
from functools import lru_cache

@lru_cache(maxsize=1024) # define the amount of unique pairs will be cached
def get_embedding(text: str):
    return sentence_model.encode(text, convert_to_tensor=True)  # Assume nlp_model.encode returns an embedding vector

def cosine_similarity(vec1, vec2):
    return util.cos_sim(vec1, vec2)

def nlp_similarity_cached(text1, text2):
    emb1 = get_embedding(text1)
    emb2 = get_embedding(text2)
    return cosine_similarity(emb1, emb2)

In [55]:
resume_json[1]

{'skills': [{'skill': [['Java'], ['Python']], 'years': 4.25},
  {'skill': [['hands-on experience', 'cybersecurity tools'],
    ['implemented', 'cybersecurity tools']],
   'years': 0.5}],
 'education': [{'gpa': 0,
   'major': ['Cybersecurity'],
   'minor': [],
   'institution': 'CompTIA',
   'education_level': 'Vocational'},
  {'gpa': 0,
   'major': ['Cybersecurity'],
   'minor': [],
   'institution': 'University of Texas',
   'education_level': 'Vocational'},
  {'gpa': 0,
   'major': ['Digital Media'],
   'minor': [],
   'institution': 'Not specified',
   'education_level': "Associate's"}],
 'credentials': [{'credential': ['CompTIA Security+ Certification']}],
 'professional_background': [{'years': 4.25,
   'industry': ['Music'],
   'background': ['Music Producer'],
   'related_fields_of_study': ['Music Production']},
  {'years': 6.17,
   'industry': ['Delivery Services'],
   'background': ['Delivery Driver'],
   'related_fields_of_study': ['Logistics']},
  {'years': 0.58,
   'industry

----
### Education Matching Required Functions

In [None]:
import json

#############################
#### Education Matching #####
#############################
EDU_RANK = {
    "High School Diploma": 1,
    "Vocational": 1,               # or adjust rank if you want it above HS
    "Associate's": 2,
    "Current Bachelor's Student": 3,
    "Some Bachelor's": 3,
    "Bachelor’s": 4,
    "Bachelor's": 4,  # handle various apostrophe forms
    "Some Master's": 5,
    "Current Master's Student": 5,
    "Master’s": 6,
    "Master's": 6,    # handle apostrophe
    "PhD": 7,
    "Postdoctoral": 8
}

def extract_job_education_requirements(job_desc_json):
    """
    Extracts the mandatory and preferred education requirements.
    """
    mandatory_section = job_desc_json.get("mandatory", {})
    preferred_section = job_desc_json.get("preferred", {})
    mandatory_edu = mandatory_section.get("education", [])
    preferred_edu = preferred_section.get("education", [])
    return mandatory_edu, preferred_edu

def extract_resume_education(resume_json): 
    return resume_json.get("education", [])

def extract_professional_background(resume_json):
    return resume_json.get("professional_background", [])

def canidate_has_education_level(resume_education, required_rank): 
    """Checks if the candidate meets the required education level."""
    for edu_entry in resume_education: 
        candidate_level_str = edu_entry.get("education_level", "")
        candidate_rank = EDU_RANK.get(candidate_level_str, 0)
        print(f"[Level Check] Candidate Education: '{candidate_level_str}' (Rank: {candidate_rank}) vs Required Rank: {required_rank}")
        if candidate_rank >= required_rank: 
            print("=> Candidate meets the education level requirement.\n")
            return True
    print("=> Candidate does NOT meet the education level requirement.\n")
    return False

def get_equivalent_experience_score(resume_experience, related_field_of_study_list, threshold=0.6, min_years=4):
    """
    Computes a weighted average similarity score for experience-based matches, 
    comparing against 'related_fields_of_study' instead of 'field_of_study'.
    """
    print("\n========== Experience Matching ==========")
    
    # Filter out any "Related" fields.
    filtered_related_field_list = [field for field in related_field_of_study_list if field.lower() != "related"]
    
    total_relevant_years = 0.0
    weighted_sim_sum = 0.0
    
    for exp_entry in resume_experience:
        candidate_related_fields = exp_entry.get("related_fields_of_study", [])  # Use 'related_fields_of_study' 03/06
        job_titles = exp_entry.get("background", [])
        job_title_str = ", ".join(job_titles) if job_titles else "Unknown"
        years = exp_entry.get("years", 0)

        max_similarity = 0.0
        
        for candidate_field in candidate_related_fields:
            for req_field in filtered_related_field_list:
                sim_score = nlp_similarity_cached(candidate_field, req_field)
                print(f"[Experience] Job: '{job_title_str}' | Candidate Field: '{candidate_field}' vs Required: '{req_field}' => {sim_score.item()}")
                
                if sim_score.item() > max_similarity:
                    max_similarity = sim_score.item()
        
        if max_similarity >= threshold:
            weighted_sim_sum += max_similarity * years
            total_relevant_years += years
            print(f"=> Using max similarity {max_similarity} (Years: {years}) | Contribution: {max_similarity * years}")
            print(f"=> Total Relevant Years so far: {total_relevant_years}\n")

    if total_relevant_years >= min_years and total_relevant_years > 0:
        avg_score = weighted_sim_sum / total_relevant_years
        print(f"=> Total Relevant Experience: {total_relevant_years} (Min Required: {min_years})")
        print(f"=> Weighted Average Similarity Score from Experience: {avg_score}\n")
        return avg_score
    else:
        print(f"=> Total Relevant Experience: {total_relevant_years} (Min Required: {min_years}) -- Not enough experience.\n")
        return 0.0

def get_required_field_score(resume_education, resume_experience, required_fields, must_have_formal_degree, required_rank, threshold=0.6, min_years=4, ignore_threshold=False):
    """
    Computes an average similarity score for matching the candidate’s formal education
    against the required fields of study.
    """
    # Determine if "Related" is present (for using lower threshold).
    use_lower_threshold = any(field.lower() == "related" for field in required_fields)
    # Filter out "Related" from the comparisons.
    filtered_required_fields = [field for field in required_fields if field.lower() != "related"]
    
    print(f"\n========== Formal Education Matching ==========")
    print(f"Required Fields: {json.dumps(required_fields, indent=4, ensure_ascii=False)}")
    similarity_scores = []
    for edu_entry in resume_education:
        candidate_level_str = edu_entry.get("education_level", "")
        candidate_rank = EDU_RANK.get(candidate_level_str, 0)
        print(f"Candidate Education: Level='{candidate_level_str}', Majors={json.dumps(edu_entry.get('major', []), indent=4, ensure_ascii=False)}")
        if candidate_rank >= required_rank:
            for candidate_major in edu_entry.get("major", []):
                for required_field in filtered_required_fields:
                    sim_score = nlp_similarity_cached(candidate_major, required_field)
                    print(f"[Formal] Comparing Candidate Major '{candidate_major}' with Required Field '{required_field}' => {sim_score.item()}")
                    if not ignore_threshold:
                        effective_threshold = threshold if use_lower_threshold else 0.95
                        if sim_score >= effective_threshold:
                            similarity_scores.append(sim_score.item())
                    else:
                        similarity_scores.append(sim_score.item())
        else:
            continue
    if similarity_scores:
         avg_score = sum(similarity_scores) / len(similarity_scores)
         print(f"=> Average Similarity Score from Formal Education: {avg_score}\n")
         return avg_score
    else:
         print("=> No formal education match found; using experience fallback...\n")
         exp_score = get_equivalent_experience_score(resume_experience, required_fields, threshold=threshold, min_years=min_years)
         return exp_score

# ---------------------- Credentials Functions ---------------------- 
# Update 3/7/2025: Added credentials functions for matching job credentials.

def extract_job_credentials(job_desc_json):
    """
    Extracts the credentials specified in the job description.
    Returns a tuple: (mandatory_credentials, preferred_credentials)
    """
    mandatory_section = job_desc_json.get("mandatory", {})
    preferred_section = job_desc_json.get("preferred", {})
    mandatory_credentials = mandatory_section.get("credentials", [])
    preferred_credentials = preferred_section.get("credentials", [])
    print("Extracted Job Credentials:")
    print(f"  Mandatory: {mandatory_credentials}")
    print(f"  Preferred: {preferred_credentials}\n")
    return mandatory_credentials, preferred_credentials

def extract_resume_credentials(resume_json):
    """
    Extracts the candidate's credentials from the resume.
    """
    credentials = resume_json.get("credentials", [])
    print("Extracted Resume Credentials:")
    print(f"  {credentials}\n")
    return credentials

def match_credentials(required_credentials, resume_credentials):
    """
    Computes a match score for a list of required credentials using NLP similarity.
    """
    if not required_credentials:
        return None

    required_scores = []
    for req_cred_obj in required_credentials:
        for req_cred in req_cred_obj.get("credential", []):
            best_sim = 0.0
            for cred_obj in resume_credentials:
                candidate_creds = cred_obj.get("credential", [])
                for cand_cred in candidate_creds:
                    sim = nlp_similarity(req_cred, cand_cred).item()
                    if sim > best_sim:
                        best_sim = sim
            required_scores.append(best_sim)
            print(f"Best similarity for required credential '{req_cred}': {best_sim}")
    overall = sum(required_scores) / len(required_scores) if required_scores else 0.0
    print(f"Average similarity score for these credentials: {overall}\n")
    return overall

def calculate_mandatory_credentials_score(job_desc_json, resume_json):
    """
    Calculates the credentials match score for mandatory requirements.
    """
    job_mandatory, _ = extract_job_credentials(job_desc_json)
    resume_creds = extract_resume_credentials(resume_json)
    if not job_mandatory:
        print("No mandatory credentials specified.\n")
        return None
    score = match_credentials(job_mandatory, resume_creds)
    print(f"Mandatory Credentials Score: {score}\n")
    return score

def calculate_preferred_credentials_score(job_desc_json, resume_json):
    """
    Calculates the credentials match score for preferred requirements.
    """
    _, job_preferred = extract_job_credentials(job_desc_json)
    resume_creds = extract_resume_credentials(resume_json)
    if not job_preferred:
        print("No preferred credentials specified.\n")
        return None
    score = match_credentials(job_preferred, resume_creds)
    print(f"Preferred Credentials Score: {score}\n")
    return score

def calculate_overall_credentials_score(job_desc_json, resume_json):
    """
    Combines the mandatory and preferred credentials match scores into an overall score.
    """
    mand_score = calculate_mandatory_credentials_score(job_desc_json, resume_json)
    pref_score = calculate_preferred_credentials_score(job_desc_json, resume_json)
    
    if mand_score is None and pref_score is None:
        overall = 1.0
    elif mand_score is not None and pref_score is not None:
        overall = (mand_score + pref_score) / 2
    elif mand_score is not None:
        overall = mand_score
    else:
        overall = pref_score
    print(f"Overall Credentials Match Score: {overall}\n")
    return overall

# ------------------- End Credentials Functions ------------------- 

def meets_education_requirement(requirement_dict, resume_education, resume_experience, threshold=0.7, min_years=4, allow_fallback=False, job_desc_json=None, resume_json=None):
    """
    Computes a composite score for a single education requirement.
    If multiple education levels are specified, the final score is the average of each level's score.
    Additionally, if any required level contains "credential", check credentials.
    
    Update 3/7/2025: If a required level is "Or Experience" and is not in EDU_RANK, skip it.
    """
    print("\n========== Checking Single Education Requirement ==========")
    print("Job Requirement:")
    print(json.dumps(requirement_dict, indent=4, ensure_ascii=False))
    
    required_fields = requirement_dict.get('field_of_study', [])
    required_levels = requirement_dict.get('education_level', [])
    
    # Determine if a formal degree is mandatory.
    must_have_formal_degree = True
    for lvl in required_levels:
        if "or experience" in lvl.lower():
            must_have_formal_degree = False
            print("=> Job accepts equivalent experience in lieu of formal education.\n")
            break
    if allow_fallback:
        must_have_formal_degree = False

    max_required_rank = 0
    for lvl in required_levels:
        # Update 3/7/2025: If this level is "Or Experience" and is not found in EDU_RANK, skip it.
        if lvl.lower().strip() == "or experience" and EDU_RANK.get(lvl, 0) == 0:
            print(f"=> Skipping education level '{lvl}' because it is 'Or Experience' and not ranked.\n")
            continue
        candidate_rank_val = EDU_RANK.get(lvl, 0)
        if candidate_rank_val > max_required_rank:
            max_required_rank = candidate_rank_val
            print(f"=> Updated Required Education Rank to: {max_required_rank} based on level '{lvl}'\n")
    
    level_scores = []
    
    # Update 3/7/2025: Check for credential requirement.
    credential_required = any("credential" in lvl.lower() for lvl in required_levels)
    if credential_required:
        if job_desc_json is not None and resume_json is not None:
            print("=> Credential requirement detected. Checking candidate credentials...\n")  # Update 3/7/2025
            credential_score = calculate_overall_credentials_score(job_desc_json, resume_json)  # Update 3/7/2025
            print(f"=> Credential Score: {credential_score}\n")  # Update 3/7/2025
            level_scores.append(credential_score)  # Update 3/7/2025
        else:
            print("=> Credential requirement detected but job/resume JSON not provided for credentials matching.\n")  # Update 3/7/2025
            level_scores.append(0)  # Update 3/7/2025
    
    if required_fields:
        print(f"=> Required Field(s) of Study: {json.dumps(required_fields, indent=4, ensure_ascii=False)}\n")
        for lvl in required_levels:
            # Skip "Or Experience" levels that aren't in EDU_RANK.
            if lvl.lower().strip() == "or experience" and EDU_RANK.get(lvl, 0) == 0:
                continue  # Update 3/7/2025
            required_rank = EDU_RANK.get(lvl, 0)
            if must_have_formal_degree:
                formal_score = get_required_field_score(
                    resume_education, resume_experience, required_fields,
                    must_have_formal_degree, required_rank,
                    threshold=threshold, min_years=min_years, ignore_threshold=False)
            else:
                formal_score = get_required_field_score(
                    resume_education, resume_experience, required_fields,
                    must_have_formal_degree, required_rank,
                    threshold=threshold, min_years=min_years, ignore_threshold=True)
                exp_score = get_equivalent_experience_score(
                    resume_experience, required_fields,
                    threshold=threshold, min_years=min_years)
                print(f"=> Formal Education Score: {formal_score}")
                print(f"=> Experience Score: {exp_score}\n")
                if formal_score > 0 and exp_score > 0:
                    formal_score = (formal_score + exp_score) / 2
                else:
                    formal_score = formal_score or exp_score
            print(f"=> Score for level '{lvl}': {formal_score}\n")
            level_scores.append(formal_score)
    else:
        if must_have_formal_degree:
            level_scores.append(1.0 if canidate_has_education_level(resume_education, max_required_rank) else 0.0)
        else:
            level_scores.append(1.0 if canidate_has_education_level(resume_education, max_required_rank)
                                else get_equivalent_experience_score(resume_experience, ["Any"], threshold=threshold, min_years=min_years))
    overall_score = sum(level_scores) / len(level_scores) if level_scores else 0.0
    print(f"=> Final Composite Score for Requirement: {overall_score}\n")
    return overall_score

def calculate_mandatory_education_score(job_desc_json, resume_json, threshold=0.7, min_years=4):
    mandatory_edu_list, _ = extract_job_education_requirements(job_desc_json)
    resume_education = extract_resume_education(resume_json)
    resume_experience = extract_professional_background(resume_json)
    print("\n========== Mandatory Education Requirements ==========")
    print(json.dumps(mandatory_edu_list, indent=4, ensure_ascii=False))
    print("\n========== Candidate Education ==========")
    print(json.dumps(resume_education, indent=4, ensure_ascii=False))
    if not mandatory_edu_list:
        print("=> No mandatory education requirements specified.\n")
        return None
    mandatory_scores = []
    for requirement in mandatory_edu_list:
        print("\n--- Checking Mandatory Requirement ---")
        score = meets_education_requirement(requirement, resume_education, resume_experience, threshold=threshold, min_years=min_years, job_desc_json=job_desc_json, resume_json=resume_json)
        if score == 0:
            print("!!! Mandatory education requirement NOT met. Final Score: 0.0 !!!\n")
            return 0.0
        print(f"=> Mandatory Requirement Score: {score}\n")
        mandatory_scores.append(score)
    mandatory_avg = sum(mandatory_scores) / len(mandatory_scores)
    return mandatory_avg

def calculate_preferred_education_score(job_desc_json, resume_json, threshold=0.7, min_years=4):
    _, preferred_edu_list = extract_job_education_requirements(job_desc_json)
    resume_education = extract_resume_education(resume_json)
    resume_experience = extract_professional_background(resume_json)
    print("\n========== Preferred Education Requirements ==========")
    print(json.dumps(preferred_edu_list, indent=4, ensure_ascii=False))
    print("\n========== Candidate Education ==========")
    print(json.dumps(resume_education, indent=4, ensure_ascii=False))
    if not preferred_edu_list:
        print("=> No preferred education requirements specified.\n")
        return None
    preferred_scores = []
    for requirement in preferred_edu_list:
        print("\n--- Checking Preferred Requirement ---")
        print("Requirement:")
        print(json.dumps(requirement, indent=4, ensure_ascii=False))
        score = meets_education_requirement(requirement, resume_education, resume_experience, threshold=threshold, min_years=min_years, allow_fallback=True, job_desc_json=job_desc_json, resume_json=resume_json)
        print(f"=> Preferred Requirement Score: {score}\n")
        preferred_scores.append(score)
    preferred_avg = sum(preferred_scores) / len(preferred_scores)
    return preferred_avg

def calculate_education_match_score(job_desc_json, resume_json, threshold=0.7, min_years=4):
    print("========== Starting Education Match Score Calculation ==========\n")
    mandatory_score = calculate_mandatory_education_score(job_desc_json, resume_json, threshold, min_years)
    preferred_score = calculate_preferred_education_score(job_desc_json, resume_json, threshold, min_years)
    if mandatory_score is not None and preferred_score is not None:
        overall_score = (mandatory_score + preferred_score) / 2
    elif mandatory_score is not None:
        overall_score = mandatory_score
    elif preferred_score is not None:
        overall_score = preferred_score
    else:
        overall_score = 0
    print(f"\n========== Overall Education Match Score: {overall_score} ==========\n")
    return overall_score

In [54]:
calculate_education_match_score(resume_json=resume_json[3], job_desc_json=job_desc_json_test)



[
    {
        "field_of_study": [
            "Mathematics",
            "Statistics",
            "Computer Science",
            "Engineering",
            "Physics",
            "Economics",
            "Related"
        ],
        "education_level": [
            "Bachelor's"
        ]
    },
    {
        "field_of_study": [
            "Mathematics",
            "Statistics",
            "Computer Science",
            "Engineering",
            "Physics",
            "Economics",
            "Related"
        ],
        "education_level": [
            "Master's",
            "Or Credential"
        ]
    },
    {
        "field_of_study": [
            "Mathematics",
            "Statistics",
            "Computer Science",
            "Engineering",
            "Physics",
            "Economics",
            "Related"
        ],
        "education_level": [
            "PhD"
        ]
    },
    {
        "field_of_study": [
            "Mathematics",
            "Statisti

0.1667622928818067

----
#### Start Skill Matching Algorithm

In [10]:
def extract_job_mandatory_skills(job_description_json): 
    """
    Last Update: 03/01/2025
    Function expects a single JSON of a job description

    Returns: List of skills that are mandatory for the job listing
    """
    return(job_description_json.get("mandatory", {}).get('hard_skills', []))

def extract_job_preferred_skills(job_description_json): 
    """
    Last Update: 03/01/2025
    Function expects a single JSON of a job description

    Returns: List of skills that are preferred for the job listing
    """
    return(job_description_json.get("preferred", {}).get("hard_skills", []))

def extract_resume_skills(resume_json):
    return(resume_json.get("skills", [])) 

In [None]:
def compute_required_skill_similarity(candidate_skill, job_req_skill_value):
    """
    Given a candidate skill (which may be a string or a list of strings) and a 
    job-required skill value (a nested list of terms), compute a similarity score.
    
    For each inner list in job_req_skill_value (each representing a group that must be met together):
      - Normalize candidate_skill into a list of candidate terms.
      - For each candidate term, compute the similarity with every term in the group 
        and take the maximum similarity.
      - Average these maximum similarities (one per candidate term) to get a group score.
    
    If there is only one group, return its score. If there are multiple groups,
    return the maximum group score (i.e. the best match).
    
    Detailed print statements are provided to show the matching details.
    """
    # Normalize candidate_skill to a list of strings.
    if isinstance(candidate_skill, str):
        candidate_terms = [candidate_skill]
    elif isinstance(candidate_skill, list):
        candidate_terms = candidate_skill
    else:
        candidate_terms = []

    if not job_req_skill_value or not candidate_terms:
        return 0.0

    inner_avgs = []
    # Process each group in the job requirement.
    for group in job_req_skill_value:
        if not group:
            continue
        candidate_max_sims = []
        print(f"   Processing job group: {group}")
        for cand_term in candidate_terms:
            # Compute similarity of this candidate term with each term in the group.
            sims = []
            for req_term in group:
                sim = nlp_similarity_cached(cand_term, req_term).item()
                sims.append(sim)
                print(f"     --> Comparing candidate term '{cand_term}' with job term '{req_term}': {sim}")
            max_sim = max(sims) if sims else 0.0
            print(f"     --> Max similarity for candidate term '{cand_term}' vs group {group}: {max_sim}")
            candidate_max_sims.append(max_sim)
        # Average the best similarities for this group.
        group_avg = sum(candidate_max_sims) / len(candidate_max_sims) if candidate_max_sims else 0.0
        print(f"   --> Average similarity for candidate terms vs group {group}: {group_avg}")
        inner_avgs.append(group_avg)
    best_avg = max(inner_avgs) if inner_avgs else 0.0
    print(f"   --> Best average similarity among groups: {best_avg}")
    return best_avg

def calculate_skill_match_score(job_description_json, resume_json, skill_type='mandatory'):
    """
    Calculates an overall match score for skills (either mandatory or preferred).

    For each job-required skill (each requirement dictionary):
      - It prints the job requirement (the nested list of required terms) and the minimum years.
      - For each candidate skill (from the resume) that meets the minimum years requirement,
        it computes the similarity (using compute_required_skill_similarity) between
        the candidate's skill and the job requirement.
      - It prints out the candidate skill, its years, and the computed similarity.
      - It then takes the highest similarity score among all candidate skills for that requirement.
    
    Finally, the overall score is computed as the average of these best-match scores
    (one for each job-required skill).
    """
    if skill_type == 'mandatory':
        job_skills = extract_job_mandatory_skills(job_description_json)
    else:
        job_skills = extract_job_preferred_skills(job_description_json)
        
    resume_skills = extract_resume_skills(resume_json)
    requirement_scores = []
    
    for req in job_skills:
        # Extract the required skill value (a nested list) and the minimum years.
        job_req_skill_value = req.get('skill', [])
        min_years_required = req.get('minyears', [0])[0]
        
        print("----------------------------------------------------")
        print("Processing Job Skill Requirement:")
        print(f"  Job Requirement: {job_req_skill_value}")
        print(f"  Minimum Years Required: {min_years_required}")
        
        best_match = 0.0  # best similarity for this particular job requirement
        best_candidate_skill = None
        
        # Iterate over each candidate skill entry.
        for candidate in resume_skills:
            candidate_years = candidate.get('years', 0)
            # Only consider candidate skills that meet the minimum years requirement.
            if candidate_years >= min_years_required:
                # Each candidate's "skill" field may itself be a nested list.
                for candidate_skill in candidate.get('skill', []):
                    sim = compute_required_skill_similarity(candidate_skill, job_req_skill_value)
                    print(f"   Candidate Skill: {candidate_skill} (Years: {candidate_years}) -> Similarity: {sim}")
                    if sim > best_match:
                        best_match = sim
                        best_candidate_skill = candidate_skill
        print(f"  Best match for requirement {job_req_skill_value}: {best_match} from candidate skill: {best_candidate_skill}")
        requirement_scores.append(best_match)
    
    overall_score = sum(requirement_scores) / len(requirement_scores) if requirement_scores else 0.0
    print("----------------------------------------------------")
    print(f"Overall Skill Match Score for '{skill_type}': {overall_score}")
    return overall_score

# Example usage:
# (Assuming extract_job_mandatory_skills, extract_job_preferred_skills, and extract_resume_skills are defined as in your data.)
mandatory_score = calculate_skill_match_score(job_description_json=job_desc_json[0], resume_json=resume_json[1], skill_type='mandatory')
preferred_score = calculate_skill_match_score(job_description_json=job_desc_json[0], resume_json=resume_json[1], skill_type='preferred')

print("Mandatory Skills Score:", mandatory_score)
print("Preferred Skills Score:", preferred_score)

----------------------------------------------------
Processing Job Skill Requirement:
  Job Requirement: [['mastery of core programming languages'], ['Java']]
  Minimum Years Required: 0
   Processing job group: ['mastery of core programming languages']
     --> Comparing candidate term 'Java' with job term 'mastery of core programming languages': 0.5581164360046387
     --> Max similarity for candidate term 'Java' vs group ['mastery of core programming languages']: 0.5581164360046387
   --> Average similarity for candidate terms vs group ['mastery of core programming languages']: 0.5581164360046387
   Processing job group: ['Java']
     --> Comparing candidate term 'Java' with job term 'Java': 1.0
     --> Max similarity for candidate term 'Java' vs group ['Java']: 1.0
   --> Average similarity for candidate terms vs group ['Java']: 1.0
   --> Best average similarity among groups: 1.0
   Candidate Skill: ['Java'] (Years: 4.25) -> Similarity: 1.0
   Processing job group: ['mastery of 

--- 
### Creditials Matching Algo

In [31]:
calculate_overall_credentials_score(job_desc_json=job_desc_json[1], resume_json=resume_json[1])

Extracted Job Credentials:
  Mandatory: [{'credential': ['Active SECRET clearance']}]
  Preferred: [{'credential': ['Agile Developer certification', 'Certified Scrum Developer certification']}]

Extracted Resume Credentials:
  [{'credential': ['CompTIA Security+ Certification']}]

Best similarity for required credential 'Active SECRET clearance': 0.4891560673713684
Average similarity score for these credentials: 0.4891560673713684

Mandatory Credentials Score: 0.4891560673713684

Extracted Job Credentials:
  Mandatory: [{'credential': ['Active SECRET clearance']}]
  Preferred: [{'credential': ['Agile Developer certification', 'Certified Scrum Developer certification']}]

Extracted Resume Credentials:
  [{'credential': ['CompTIA Security+ Certification']}]

Best similarity for required credential 'Agile Developer certification': 0.5511242747306824
Best similarity for required credential 'Certified Scrum Developer certification': 0.5735363960266113
Average similarity score for these cred

0.5257432013750076

##### Responsibilities

In [39]:
job_desc_json[1]

{'details': {'job_title': ['Digital Engineer II'],
  'job_title_base': ['Digital Engineer'],
  'company_name': ['PMAT'],
  'industry': ['Defense', 'Information Technology'],
  'employment_type': ['Full-time'],
  'wage': [],
  'location': [{'city': 'San Diego', 'state': 'CA', 'country': 'US'}],
  'wfh_policy': [],
  'travel_required': {'required': False, 'hours_weekly': 0},
  'benefits': {'medical': False,
   'dental': False,
   'vision': False,
   'mental_health': False,
   'hsa': False,
   'fsa': False,
   '401k_match': False,
   'equity': False,
   'unlimited_pto': False,
   'tuition_reimbursement': False,
   'bonus': False,
   'other': []},
  'company_stage': [],
  'tax_terms': ['Direct-hire'],
  'experience_level': ['Associate'],
  'work_schedule': [],
  'work_authorization': ['Active SECRET clearance']},
 'mandatory': {'hard_skills': [{'skill': [['applying programming concepts',
      'professional setting'],
     ['applying programming concepts', 'academic setting']],
    'minyea