In [2]:
%%capture
import os
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer, util
import json
import spacy
from functools import lru_cache
import torch
from utils.semantic_similarity import nlp_similarity_cached # import first
from utils.semantic_similarity import sentence_model # import second

current_wd = os.getcwd()

def safe_json_loads(val):
    try:
        return json.loads(val)
    except json.JSONDecodeError:
        print(f"Error parsing JSON: {val}")
        return None

job_desc_data = pd.read_excel(os.path.join(current_wd, "data", "sample_des_extractions_test_final_3.22.25.xlsx"))
resume_data = pd.read_excel(os.path.join(current_wd, "data", "sample_res_extractions_final_3.18.25.xlsx"))

job_desc_data["job_desc_parsed"] = job_desc_data["extracted"].apply(safe_json_loads)
resume_data["resume_parsed"] = resume_data["extracted"].apply(safe_json_loads)

job_desc_json = job_desc_data['job_desc_parsed']
resume_json = resume_data['resume_parsed']

def append_job_id_to_job_desc(row):
    job_id = row['id']
    job_desc = row['job_desc_parsed']

    if job_desc is None:
        return None
    
    if isinstance(job_desc, str):
        job_desc = safe_json_loads(job_desc)
        if job_desc is None:
            return None
    
    job_desc['job_id'] = job_id
    return job_desc

job_desc_data['job_desc_json'] = job_desc_data.apply(append_job_id_to_job_desc, axis=1)

In [3]:
def extract_all_strings_from_json(j):
    """
    Recursively extracts all non-empty strings from a JSON-like object (dict or list).
    """
    result = []
    if isinstance(j, dict):
        for v in j.values():
            result.extend(extract_all_strings_from_json(v))
    elif isinstance(j, list):
        for item in j:
            if isinstance(item, str):
                s = item.strip()
                if s:
                    result.append(s)
            else:
                result.extend(extract_all_strings_from_json(item))
    return result

# Precompute Embeddings from a list of JSONS
def precompute_embeddings_for_json_list(json_list, batch_size=64):
    """
    Iterates over a list (or Series) of JSON objects, extracts all non-empty strings
    from each JSON, computes embeddings for all unique strings in batches, and returns
    a dictionary mapping each unique string to its embedding.
    
    Parameters:
      json_list (iterable): A list (or Series) of JSON objects (dicts).
      batch_size (int): Batch size for encoding.
      
    Returns:
      dict: A mapping from string to its embedding.
    """
    all_strings = set()
    for j in json_list:
        if j is None:
            continue
        strings = extract_all_strings_from_json(j)
        all_strings.update(strings)
    
    all_strings = list(all_strings)
    if all_strings:
        embeddings = sentence_model.encode(all_strings, batch_size=batch_size, convert_to_tensor=False)
    else:
        embeddings = np.zeros((0, sentence_model.get_sentence_embedding_dimension()))
    
    string_to_emb = {s: emb for s, emb in zip(all_strings, embeddings)}
    return string_to_emb

# Now call the function with your list of job description JSON objects:
embeddings = precompute_embeddings_for_json_list(job_desc_json, batch_size=64)

In [3]:
import json
from utils.safe_averages import safe_average

# ---------------------------
# FINAL OVERALL MATCH SCORE
# ---------------------------
def calculate_overall_match_score(job_json, resume_json,
                                  skill_weight=0.20,
                                  education_weight=0.20,
                                  responsibilities_weight=0.20,
                                  credentials_weight=0.20,
                                  background_weight=0.20):
    """
    Calculates the final overall match score as a safe average of the available section scores.
    If a section has no requirements, its score is returned as None and is ignored in the overall average.
    Sections include:
      - Skills
      - Education
      - Responsibilities
      - Credentials
      - Professional Background (the combined background/industry score)
    """
    skill_dict = calculate_overall_skill_match_score(job_json, resume_json)
    edu_dict = calculate_overall_education_match_score(job_json, resume_json)
    resp_dict = calculate_overall_responsibilities_match_score(job_json, resume_json)
    cred_dict = calculate_overall_credentials_score(job_json, resume_json)
    bg_dict = calculate_overall_background_score(job_json, resume_json)
    
    sections = []
    if skill_dict["overall_skill_score"] is not None:
        sections.append(skill_dict["overall_skill_score"])
    if edu_dict["overall_education_score"] is not None:
        sections.append(edu_dict["overall_education_score"])
    if resp_dict["overall_responsibilities_score"] is not None:
        sections.append(resp_dict["overall_responsibilities_score"])
    if cred_dict["overall_credentials_score"] is not None:
        sections.append(cred_dict["overall_credentials_score"])
    if bg_dict["overall_professional_background_score"] is not None:
        sections.append(bg_dict["overall_professional_background_score"])
    
    overall_match = safe_average(sections)
    job_id = job_json.get("id")
    
    final_result = {
        "job_id": job_id,
        "skills": {**skill_dict, "job_id": job_id},
        "education": {**edu_dict, "job_id": job_id},
        "responsibilities": {**resp_dict, "job_id": job_id},
        "credentials": {**cred_dict, "job_id": job_id},
        "professional_background": {**bg_dict, "job_id": job_id},
        "overall_match_score": overall_match
    }
    
    print(f"\nFinal Overall Match Score: {overall_match}")
    return final_result

# # ---------------------------
# # Example Usage:
# # (Assuming job_desc_json and resume_json are your lists/Series of job and resume JSON objects)
# result = calculate_overall_match_score(job_json=job_desc_json[47], resume_json=resume_json[2])
# print(result)

In [4]:
def calculate_scores_for_all_jobs(job_desc_list,
                                  resume_json,
                                  skill_weight=0.25,
                                  education_weight=0.25,
                                  responsibilities_weight=0.25,
                                  credentials_weight=0.25,
                                  skill_mandatory_weight=0.5,
                                  skill_preferred_weight=0.5,
                                  education_threshold=0.7,
                                  education_min_years=4,
                                  education_mandatory_weight=0.5,
                                  education_preferred_weight=0.5,
                                  credentials_mandatory_weight=0.5,
                                  credentials_preferred_weight=0.5):
    """
    Iterates over each job description in `job_desc_list`. For each entry, calls
    `calculate_overall_match_score` and returns a dictionary of sub-scores plus
    the overall match score, keyed by the index.

    Parameters:
      - job_desc_list: A list of job description JSON objects.
      - resume_json: A single JSON object for the candidate’s resume 
        (or adapt to pick different resumes if needed).
      - The remaining parameters map to the weights and thresholds used in 
        calculate_overall_match_score.

    Returns:
      A dictionary where each key is the index of the job in `job_desc_list` and
      each value is the dictionary returned by `calculate_overall_match_score`.
    """

    results_dict = {}

    # Loop by numeric index
    for i in range(len(job_desc_list)):
        job_json = job_desc_list[i]

        # Call the UPDATED function that returns a dictionary with sub-scores
        all_scores = calculate_overall_match_score(
            job_json=job_json,
            resume_json=resume_json,
            skill_weight=skill_weight,
            education_weight=education_weight,
            responsibilities_weight=responsibilities_weight,
            credentials_weight=credentials_weight,
            skill_mandatory_weight=skill_mandatory_weight,
            skill_preferred_weight=skill_preferred_weight,
            education_threshold=education_threshold,
            education_min_years=education_min_years,
            education_mandatory_weight=education_mandatory_weight,
            education_preferred_weight=education_preferred_weight,
            credentials_mandatory_weight=credentials_mandatory_weight,
            credentials_preferred_weight=credentials_preferred_weight
        )

        # Store the entire dictionary of sub-scores/overall score in results_dict[i]
        results_dict[i] = all_scores

    return results_dict

#### Skill Matching

In [4]:
# SECTION 1: SKILLS MATCHING
from utils.mandatory_skill_score import calculate_mandatory_skill_scores
from utils.preferred_skill_score import calculate_preferred_skill_scores

mandatory_skill_scores = calculate_mandatory_skill_scores(job_json_list=job_desc_json, resume_json=resume_json[7])
preferred_skill_scores = calculate_preferred_skill_scores(job_json_list=job_desc_json, resume_json=resume_json[7])

print(mandatory_skill_scores)
print(preferred_skill_scores)

{'cc807c45-4a2f-4335-90d5-b50669459cc5': {'mandatory_skill_score': 0.6753497163454694}, '1400a854-77d4-4aab-a84c-9a643f100657': {'mandatory_skill_score': 0.6502660989761354}, '72f7a93e-824e-485f-b356-58c523fe65d0': {'mandatory_skill_score': 0.6075126628081006}, 'ba72461b-56f5-4c1b-90b0-3c3b9ffa8a1c': {'mandatory_skill_score': 0.8323876776575616}, 'a713d178-a7dc-4f03-b569-9c7deb76bff3': {'mandatory_skill_score': 0.8692123757468332}, '428eda01-4bb2-44c7-9281-9a5c43f92e3a': {'mandatory_skill_score': 0.6948758999506633}, '5c89be22-d4da-483f-815a-fbc31e6e14dd': {'mandatory_skill_score': 0.7269078758027823}, '8312a3ee-11fa-48a0-946a-a24ad3dc4e7e': {'mandatory_skill_score': 0.544359400868416}, 'f7842b4d-9109-4ce1-b506-d36989207b87': {'mandatory_skill_score': 0.6125117142995201}, 'c8c0c00a-4efd-4b02-b681-fa7d6280d827': {'mandatory_skill_score': 0.371232161919276}, '63af5a5c-5d46-4c8d-b778-ffa195ab4fac': {'mandatory_skill_score': None}, '91ff5695-564e-4e0c-aad2-f244ab0ed707': {'mandatory_skill_

---- 
#### Responsibilities Matching

In [5]:
from utils.responsibilities_match_score import calculate_responsibilities_scores

responsibilities_match_scores = calculate_responsibilities_scores(job_json_list=job_desc_json, resume_json=resume_json[7])
print(responsibilities_match_scores)

{'cc807c45-4a2f-4335-90d5-b50669459cc5': {'responsibilities_score': 0.6316065947816832}, '1400a854-77d4-4aab-a84c-9a643f100657': {'responsibilities_score': 0.6705974141756695}, '72f7a93e-824e-485f-b356-58c523fe65d0': {'responsibilities_score': 0.6655583795573977}, 'ba72461b-56f5-4c1b-90b0-3c3b9ffa8a1c': {'responsibilities_score': 0.6294076357569015}, 'a713d178-a7dc-4f03-b569-9c7deb76bff3': {'responsibilities_score': 0.7857423718013462}, '428eda01-4bb2-44c7-9281-9a5c43f92e3a': {'responsibilities_score': 0.691143016020457}, '5c89be22-d4da-483f-815a-fbc31e6e14dd': {'responsibilities_score': 0.5922732103018116}, '8312a3ee-11fa-48a0-946a-a24ad3dc4e7e': {'responsibilities_score': 0.6399371458424464}, 'f7842b4d-9109-4ce1-b506-d36989207b87': {'responsibilities_score': 0.5979085108181379}, 'c8c0c00a-4efd-4b02-b681-fa7d6280d827': {'responsibilities_score': 0.7216731434067091}, '63af5a5c-5d46-4c8d-b778-ffa195ab4fac': {'responsibilities_score': 0.6696811914443972}, '91ff5695-564e-4e0c-aad2-f244ab0

----- 
#### Education Matching

In [6]:
from utils.mandatory_education_score import calculate_mandatory_education_scores
from utils.preferred_education_score import calculate_preferred_education_scores

mandatory_education_scores = calculate_mandatory_education_scores(job_json_list=job_desc_json, resume_json=resume_json[7])
preferred_education_scores = calculate_preferred_education_scores(job_json_list=job_desc_json, resume_json=resume_json[7])

print(mandatory_education_scores)
print(preferred_education_scores)

{'cc807c45-4a2f-4335-90d5-b50669459cc5': 0.0, '1400a854-77d4-4aab-a84c-9a643f100657': 0.0, '72f7a93e-824e-485f-b356-58c523fe65d0': 0.0, 'ba72461b-56f5-4c1b-90b0-3c3b9ffa8a1c': 0.0, 'a713d178-a7dc-4f03-b569-9c7deb76bff3': 0.0, '428eda01-4bb2-44c7-9281-9a5c43f92e3a': {'mandatory_education_score': 1.0}, '5c89be22-d4da-483f-815a-fbc31e6e14dd': {'mandatory_education_score': 1.0}, '8312a3ee-11fa-48a0-946a-a24ad3dc4e7e': 0.0, 'f7842b4d-9109-4ce1-b506-d36989207b87': {'mandatory_education_score': None}, 'c8c0c00a-4efd-4b02-b681-fa7d6280d827': {'mandatory_education_score': None}, '63af5a5c-5d46-4c8d-b778-ffa195ab4fac': {'mandatory_education_score': None}, '91ff5695-564e-4e0c-aad2-f244ab0ed707': {'mandatory_education_score': None}, 'abf21387-accd-4314-8f2b-eb03a8de6483': 0.0, '11fb3b10-360e-4d3f-9048-443356ae0dcd': {'mandatory_education_score': None}, '446eb377-d4b8-4362-8a9f-3c5bc27e9a1e': 0.0, 'dbf6ef88-8fc8-48d8-8d3b-1887e8ecadf2': {'mandatory_education_score': None}, '2caf9581-cb9c-447f-b2c0-

----- 
#### Credentials Matching

In [7]:
# 8
job_num = 46

print(job_desc_json[job_num].get('mandatory', {}).get('credentials', []))
# print(job_desc_json[job_num].get('preferred', {}).get('credentials', []))

[]


In [8]:
resume_json[3]

{'skills': [{'skill': [['Software Development Lifecycle']], 'years': 10.83},
  {'skill': [['Advanced Java Programming']], 'years': 10.83},
  {'skill': [['Frontend Development']], 'years': 10.83},
  {'skill': [['Spring Framework Expertise']], 'years': 10.83},
  {'skill': [['Container Orchestration']], 'years': 10.83},
  {'skill': [['Microservices Architecture']], 'years': 10.83},
  {'skill': [['RESTful API Development']], 'years': 10.83},
  {'skill': [['Cloud Computing']], 'years': 10.83},
  {'skill': [['CI/CD Pipeline Coordination']], 'years': 10.83},
  {'skill': [['Database Management']], 'years': 10.83},
  {'skill': [['Build Automation']], 'years': 10.83},
  {'skill': [['Development Efficiency']], 'years': 10.83},
  {'skill': [['Application Hosting']], 'years': 10.83},
  {'skill': [['Real-time Data Processing']], 'years': 2.5},
  {'skill': [['NoSQL Database Operations']], 'years': 2.5},
  {'skill': [['Version Control']], 'years': 10.83},
  {'skill': [['Project Tracking']], 'years': 2

In [9]:
from utils.mandatory_credentials_score import calculate_mandatory_credentials_scores
from utils.preferred_credentials_score import calculate_preferred_credentials_scores

mandatory_credentials_scores = calculate_mandatory_credentials_scores(job_json_list=job_desc_json, resume_json=resume_json[7])
preferred_credentials_scores = calculate_preferred_credentials_scores(job_json_list=job_desc_json, resume_json=resume_json[7])

print(mandatory_credentials_scores)
print(preferred_credentials_scores)

{'cc807c45-4a2f-4335-90d5-b50669459cc5': {'mandatory_credentials_score': None}, '1400a854-77d4-4aab-a84c-9a643f100657': {'mandatory_credentials_score': None}, '72f7a93e-824e-485f-b356-58c523fe65d0': {'mandatory_credentials_score': None}, 'ba72461b-56f5-4c1b-90b0-3c3b9ffa8a1c': {'mandatory_credentials_score': None}, 'a713d178-a7dc-4f03-b569-9c7deb76bff3': {'mandatory_credentials_score': None}, '428eda01-4bb2-44c7-9281-9a5c43f92e3a': {'mandatory_credentials_score': None}, '5c89be22-d4da-483f-815a-fbc31e6e14dd': {'mandatory_credentials_score': None}, '8312a3ee-11fa-48a0-946a-a24ad3dc4e7e': {'mandatory_credentials_score': None}, 'f7842b4d-9109-4ce1-b506-d36989207b87': {'mandatory_credentials_score': None}, 'c8c0c00a-4efd-4b02-b681-fa7d6280d827': {'mandatory_credentials_score': None}, '63af5a5c-5d46-4c8d-b778-ffa195ab4fac': {'mandatory_credentials_score': None}, '91ff5695-564e-4e0c-aad2-f244ab0ed707': {'mandatory_credentials_score': None}, 'abf21387-accd-4314-8f2b-eb03a8de6483': {'mandatory

----- 
#### Background Matching

In [10]:
from utils.mandatory_background_score import calculate_mandatory_background_scores
from utils.preferred_background_score import calculate_preferred_background_scores

mandatory_background_scores = calculate_mandatory_background_scores(job_json_list=job_desc_json, resume_json=resume_json[7])
preferred_background_scores = calculate_preferred_background_scores(job_json_list=job_desc_json, resume_json=resume_json[7])

print(mandatory_background_scores)
print(preferred_background_scores)


{'cc807c45-4a2f-4335-90d5-b50669459cc5': {'mandatory_background_score': 0.0, 'mandatory_industry_score': None}, '1400a854-77d4-4aab-a84c-9a643f100657': {'mandatory_background_score': 0.0, 'mandatory_industry_score': None}, '72f7a93e-824e-485f-b356-58c523fe65d0': {'mandatory_background_score': 0.0, 'mandatory_industry_score': 0.0}, 'ba72461b-56f5-4c1b-90b0-3c3b9ffa8a1c': {'mandatory_background_score': 0.2185063564777375, 'mandatory_industry_score': None}, 'a713d178-a7dc-4f03-b569-9c7deb76bff3': {'mandatory_background_score': None, 'mandatory_industry_score': None}, '428eda01-4bb2-44c7-9281-9a5c43f92e3a': {'mandatory_background_score': 1.0, 'mandatory_industry_score': None}, '5c89be22-d4da-483f-815a-fbc31e6e14dd': {'mandatory_background_score': 0.0, 'mandatory_industry_score': None}, '8312a3ee-11fa-48a0-946a-a24ad3dc4e7e': {'mandatory_background_score': None, 'mandatory_industry_score': None}, 'f7842b4d-9109-4ce1-b506-d36989207b87': {'mandatory_background_score': 0.0, 'mandatory_industry

---- 

#### Merge Scores into One Object

In [18]:
final_scores = merge_scores_by_job_id(mandatory_background_scores, 
                                      preferred_background_scores, 
                                      mandatory_education_scores,
                                      preferred_education_scores,
                                      mandatory_skill_scores,
                                      preferred_skill_scores,
                                      mandatory_credentials_scores,
                                      preferred_credentials_scores,
                                      responsibilities_match_scores,
                                      filter = True,
                                      threshold= 0.5
                                      )  



print(final_scores)

{'a713d178-a7dc-4f03-b569-9c7deb76bff3': {'mandatory_background_score': None, 'mandatory_industry_score': None, 'preferred_background_score': None, 'preferred_industry_score': None, 'preferred_education_score': None, 'mandatory_skill_score': 0.8692123757468332, 'preferred_skill_score': None, 'mandatory_credentials_score': None, 'preferred_credentials_score': None, 'responsibilities_score': 0.7857423718013462}, '428eda01-4bb2-44c7-9281-9a5c43f92e3a': {'mandatory_background_score': 1.0, 'mandatory_industry_score': None, 'preferred_background_score': None, 'preferred_industry_score': None, 'mandatory_education_score': 1.0, 'preferred_education_score': None, 'mandatory_skill_score': 0.6948758999506633, 'preferred_skill_score': 0.7472760677337649, 'mandatory_credentials_score': None, 'preferred_credentials_score': None, 'responsibilities_score': 0.691143016020457}, '63af5a5c-5d46-4c8d-b778-ffa195ab4fac': {'mandatory_background_score': None, 'mandatory_industry_score': None, 'preferred_backg

----

#### Overall Scores

In [23]:
def make_overall_scores(
    job_scores_dict,
    # top-level weights (must sum to 1):
    mandatory_weight=0.5,
    preferred_weight=0.5,
    # subcategory weights (must sum to 1):
    skills_weight=0.2,
    education_weight=0.2,
    background_weight=0.2,
    credentials_weight=0.2,
    responsibilities_weight=0.2
):
    """
    For each job_id in `job_scores_dict`, compute:
      overall_mandatory,
      overall_preferred,
      overall_score, 
      overall_skills, overall_education, overall_background, overall_credentials

    Then return a list of tuples (job_id, final_score_dict), sorted by "overall_score"
    descending.

    Weighted-sum logic:
      - If a sub-score is None, skip it (do not treat as 0).
      - If all sub-scores for "mandatory" are None => overall_mandatory is None; similarly for "preferred".
      - overall_score is a weighted average of overall_mandatory & overall_preferred, ignoring None.

    We also enforce:
      mandatory_weight + preferred_weight == 1
      skills_weight + education_weight + background_weight + credentials_weight + responsibilities_weight == 1
    """

    import math

    # Check top-level weights
    eps = 1e-9
    if abs((mandatory_weight + preferred_weight) - 1.0) > eps:
        raise ValueError("Error: mandatory_weight + preferred_weight must equal 1.")
    # Check subcategory weights
    subcat_sum = skills_weight + education_weight + background_weight + credentials_weight + responsibilities_weight
    if abs(subcat_sum - 1.0) > eps:
        raise ValueError("Error: subcategory weights must sum to 1.")

    def safe_avg(vals):
        """Average ignoring None."""
        v = [x for x in vals if x is not None]
        return sum(v) / len(v) if v else None

    def weighted_avg(subscores: dict, weights: dict):
        """
        subcategory-based weighted average ignoring None.
        e.g. subscores = {"skill": 0.8, "education": None, "background": 0.9, ...}
             weights    = {"skill": 0.2, "education": 0.2, "background": 0.2, ...}

        Sums only the sub-scores that are not None.
        If all are None => None.
        """
        numerator = 0.0
        denom = 0.0
        for cat, val in subscores.items():
            if val is not None:
                w = weights.get(cat, 0.0)
                numerator += val * w
                denom += w
        if denom == 0.0:
            return None
        return numerator / denom

    results = {}

    for job_id, data in job_scores_dict.items():
        # Extract mandatory sub-scores
        m_skill = data.get("mandatory_skill_score")
        m_edu = data.get("mandatory_education_score")
        m_bg = data.get("mandatory_background_score")
        m_cred = data.get("mandatory_credentials_score")
        m_resp = data.get("responsibilities_score")  # single responsibilities

        mandatory_subscores = {
            "skill": m_skill,
            "education": m_edu,
            "background": m_bg,
            "credentials": m_cred,
            "responsibilities": m_resp
        }
        mandatory_weights = {
            "skill": skills_weight,
            "education": education_weight,
            "background": background_weight,
            "credentials": credentials_weight,
            "responsibilities": responsibilities_weight
        }
        overall_mandatory = weighted_avg(mandatory_subscores, mandatory_weights)

        # Extract preferred sub-scores
        p_skill = data.get("preferred_skill_score")
        p_edu = data.get("preferred_education_score")
        p_bg = data.get("preferred_background_score")
        p_cred = data.get("preferred_credentials_score")
        # No responsibilities for preferred
        preferred_subscores = {
            "skill": p_skill,
            "education": p_edu,
            "background": p_bg,
            "credentials": p_cred
        }
        preferred_weights = {
            "skill": skills_weight,
            "education": education_weight,
            "background": background_weight,
            "credentials": credentials_weight
        }
        overall_preferred = weighted_avg(preferred_subscores, preferred_weights)

        # Combine top-level with mandatory/preferred weighting
        # skipping any that is None
        top_pairs = []
        if overall_mandatory is not None:
            top_pairs.append((overall_mandatory, mandatory_weight))
        if overall_preferred is not None:
            top_pairs.append((overall_preferred, preferred_weight))
        if not top_pairs:
            final_score = None
        else:
            sum_w = sum(w for _, w in top_pairs)
            final_score = sum(s * w for s, w in top_pairs) / sum_w

        # Category-level averages ignoring None
        # (like overall_skills, overall_education, etc.)
        ms = data.get("mandatory_skill_score")
        ps = data.get("preferred_skill_score")
        overall_skills = safe_avg([ms, ps])

        me = data.get("mandatory_education_score")
        pe = data.get("preferred_education_score")
        overall_education = safe_avg([me, pe])

        mb = data.get("mandatory_background_score")
        pb = data.get("preferred_background_score")
        overall_background = safe_avg([mb, pb])

        mc = data.get("mandatory_credentials_score")
        pc = data.get("preferred_credentials_score")
        overall_credentials = safe_avg([mc, pc])

        # Build final record for this job
        new_data = dict(data)  # copy original fields
        new_data.update({
            "overall_mandatory": overall_mandatory,
            "overall_preferred": overall_preferred,
            "overall_score": final_score,
            "overall_skills": overall_skills,
            "overall_education": overall_education,
            "overall_background": overall_background,
            "overall_credentials": overall_credentials
        })
        results[job_id] = new_data

    # Now we build a list of (job_id, final_scores), sorted descending by "overall_score".
    # If "overall_score" is None, treat it as 0 for sorting.
    sorted_list = sorted(
        results.items(),
        key=lambda x: x[1].get("overall_score") or 0.0,
        reverse=True
    )

    return sorted_list

In [24]:
results = make_overall_scores(final_scores)

In [25]:
results

[('a66264a5-a5f5-465b-8798-09a42dd71bb0',
  {'mandatory_background_score': None,
   'mandatory_industry_score': None,
   'preferred_background_score': None,
   'preferred_industry_score': None,
   'mandatory_education_score': None,
   'preferred_education_score': None,
   'mandatory_skill_score': 0.7624501959950315,
   'preferred_skill_score': 1.0,
   'mandatory_credentials_score': None,
   'preferred_credentials_score': None,
   'responsibilities_score': 0.7453516225020093,
   'overall_mandatory': 0.7539009092485205,
   'overall_preferred': 1.0,
   'overall_score': 0.8769504546242602,
   'overall_skills': 0.8812250979975158,
   'overall_education': None,
   'overall_background': None,
   'overall_credentials': None}),
 ('a713d178-a7dc-4f03-b569-9c7deb76bff3',
  {'mandatory_background_score': None,
   'mandatory_industry_score': None,
   'preferred_background_score': None,
   'preferred_industry_score': None,
   'preferred_education_score': None,
   'mandatory_skill_score': 0.869212375

----

#### Eval

In [32]:
matched_jobids = [job_id for job_id, _ in results]

filtered_jobs = [
    j for j in job_desc_json
    if j.get("job_id") in matched_jobids
]

In [33]:
filtered_jobs

[{'details': {'wage': [],
   'benefits': {'fsa': False,
    'hsa': False,
    'bonus': False,
    'other': [],
    'dental': False,
    'equity': False,
    'vision': False,
    'medical': False,
    '401k_match': False,
    'mental_health': False,
    'unlimited_pto': False,
    'tuition_reimbursement': False},
   'location': [{'city': 'Chicago', 'state': 'IL', 'country': 'US'}],
   'job_title': ['.NET Developer - Quality Management Software (QMS)'],
   'tax_terms': ['Direct-hire'],
   'wfh_policy': [],
   'company_name': ['QT9 Software'],
   'company_stage': [],
   'work_schedule': [],
   'job_title_base': ['.NET Developer'],
   'employment_type': ['Full-time'],
   'travel_required': {'required': False, 'hours_weekly': 0},
   'company_industry': ['SaaS software provider for Quality Management and ERP software solutions'],
   'experience_level': ['Associate'],
   'work_authorization': []},
  'mandatory': {'education': [{'field_of_study': ['Computer Science',
      'Engineering',
     

In [31]:
resume_json[7]

{'skills': [{'skill': [['Developed REST APIs', 'Spring Web Flux Framework'],
    ['Developed REST APIs', 'core Java APIs']],
   'years': 3.67},
  {'skill': [['Developed GraphQL service',
     'Spring Boot Spring Web Flux Reactor'],
    ['Configured GraphQL service', 'Spring Boot Spring Web Flux Reactor'],
    ['Maintained GraphQL service', 'Spring Boot Spring Web Flux Reactor']],
   'years': 3.67},
  {'skill': [['Development of code', 'JSP code']], 'years': 3.67},
  {'skill': [['Development of code', 'HTML']], 'years': 3.67},
  {'skill': [['Development of code', 'CSS']], 'years': 3.67},
  {'skill': [['JavaScript libraries', 'Vue.js']], 'years': 3.67},
  {'skill': [['JavaScript libraries', 'jQuery']], 'years': 3.67},
  {'skill': [['JavaScript libraries', 'Prototype']], 'years': 3.67},
  {'skill': [['Designed web application', 'AngularJS'],
    ['Developed web application', 'AngularJS']],
   'years': 3.67},
  {'skill': [['Implemented inter-service communication', 'RESTful APIs'],
    ['I

In [30]:
# Loop through all backgrounds in job_desc_json and print 
for job in job_desc_json:
    job_id = job.get("job_id")
    print(f"Job ID: {job_id}")

    # 1) Mandatory professional backgrounds
    mandatory_bgs = job.get("mandatory", {}).get("professional_background", [])
    for i, bg_req in enumerate(mandatory_bgs, start=1):
        print(f"  Mandatory Background #{i}: {bg_req}")

    # 2) Preferred professional backgrounds
    preferred_bgs = job.get("preferred", {}).get("professional_background", [])
    for i, bg_req in enumerate(preferred_bgs, start=1):
        print(f"  Preferred Background #{i}: {bg_req}")

Job ID: cc807c45-4a2f-4335-90d5-b50669459cc5
  Mandatory Background #1: {'industry': [], 'minyears': [8], 'background': [['Oracle Cloud ERP']]}
Job ID: 1400a854-77d4-4aab-a84c-9a643f100657
  Mandatory Background #1: {'industry': [], 'minyears': [8], 'background': [['relevant engineering hands-on work']]}
Job ID: 72f7a93e-824e-485f-b356-58c523fe65d0
  Mandatory Background #1: {'industry': ['financial services'], 'minyears': [5], 'background': [['Marketing Analytics'], ['similar practical experience']]}
  Preferred Background #1: {'industry': ['financial services', 'mortgage', 'credit card', 'personal loans', 'business loans'], 'minyears': [0], 'background': [['Marketing Analytics'], ['similar practical experience']]}
Job ID: ba72461b-56f5-4c1b-90b0-3c3b9ffa8a1c
  Mandatory Background #1: {'industry': [], 'minyears': [0], 'background': [["Bachelor's degree", 'computer science'], ['Software Engineering']]}
  Mandatory Background #2: {'industry': [], 'minyears': [0], 'background': [['3rd p