In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/ner-training-dataset/traindata.json
/kaggle/input/ner-training-dataset/testdata.json
/kaggle/input/job-recom-dataset/datascientist.json
/kaggle/input/job-recom-dataset/dataengineer.json
/kaggle/input/job-recom-dataset/phpdeveloper.json
/kaggle/input/job-recom-dataset/javadeveloper.json
/kaggle/input/job-recom-dataset/backenddeveloper.json


In [2]:
import spacy
import json
import random
import logging
from spacy.training import Example
import re
import numpy as np
from sklearn.metrics import precision_recall_fscore_support, accuracy_score
from spacy.training.iob_utils import biluo_tags_from_offsets

In [3]:
# --- CONFIGURATION ---
TEST_DATA_PATH = "/kaggle/input/ner-training-dataset/testdata.json"
TRAIN_DATA_PATH = "/kaggle/input/ner-training-dataset/traindata.json"
OUTPUT_DIR = "/kaggle/working/nlp_ner_model"
ITERATIONS = 15

In [4]:



# --- 1. CONVERSION FUNCTION ---
def convert_dataturks_to_spacy(dataturks_JSON_FilePath):
    training_data = []
    lines = []
    try:
        with open(dataturks_JSON_FilePath, 'r', encoding='utf-8') as f:
            lines = f.readlines()
    except Exception as e:
        print(f"Error reading file: {e}")
        return []

    for line in lines:
        data = json.loads(line)
        text = data['content']
        entities = []
        if data['annotation'] is not None:
            for annotation in data['annotation']:
                point = annotation['points'][0]
                labels = annotation['label']
                if not isinstance(labels, list):
                    labels = [labels]
                for label in labels:
                    entities.append((point['start'], point['end'] + 1, label))

        training_data.append((text, {"entities": entities}))
    return training_data

# --- 2. AUTHOR'S CLEANING (Fixes Whitespace) ---
def trim_entity_spans(data: list):
    """Removes leading and trailing white spaces from entity spans."""
    invalid_span_tokens = re.compile(r'\s')

    cleaned_data = []
    for text, annotations in data:
        entities = annotations['entities']
        valid_entities = []
        for start, end, label in entities:
            valid_start = start
            valid_end = end
            while valid_start < len(text) and invalid_span_tokens.match(text[valid_start]): valid_start += 1
            while valid_end > 1 and invalid_span_tokens.match(text[valid_end-1]): valid_end -= 1
            valid_entities.append((valid_start, valid_end, label))
        cleaned_data.append((text, {'entities': valid_entities}))

    return cleaned_data

# --- 3. CRITICAL FIX: REMOVE OVERLAPS (Fixes Crash) ---
def remove_overlaps(data: list):
    """
    Resolves conflicting entities by keeping the longest one.
    Crucial for SpaCy v3 to prevent ValueError: [E103].
    """
    cleaned_data = []
    for text, annotations in data:
        entities = annotations['entities']
        # Sort by start index (asc) and length (desc)
        sorted_entities = sorted(entities, key=lambda x: (x[0], -(x[1] - x[0])))
        
        non_overlapping = []
        last_end = -1
        
        for start, end, label in sorted_entities:
            # If the new entity starts AFTER the previous one ended, it's valid.
            if start >= last_end:
                non_overlapping.append((start, end, label))
                last_end = end
        
        cleaned_data.append((text, {'entities': non_overlapping}))
    return cleaned_data

# --- 4. PREPARE PIPELINE ---
print("Loading data...")
raw_data = convert_dataturks_to_spacy(TRAIN_DATA_PATH)

print("Cleaning whitespace (Author's method)...")
trimmed_data = trim_entity_spans(raw_data)

print("Removing overlaps (Fix for Error E103)...")
TRAIN_DATA = remove_overlaps(trimmed_data)

print(f"Data ready: {len(TRAIN_DATA)} resumes.")

# --- 5. TRAIN SPACY V3 ---
def train_spacy_v3(data, iterations):
    nlp = spacy.blank("en")
    if "ner" not in nlp.pipe_names:
        ner = nlp.add_pipe("ner", last=True)
    
    # Add labels
    for _, annotations in data:
        for ent in annotations.get("entities"):
            ner.add_label(ent[2])
            
    other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "ner"]
    
    with nlp.disable_pipes(*other_pipes):
        optimizer = nlp.begin_training()
        print(f"Starting training for {iterations} iterations...")
        
        for itn in range(iterations):
            random.shuffle(data)
            losses = {}
            for text, annotations in data:
                doc = nlp.make_doc(text)
                try:
                    example = Example.from_dict(doc, annotations)
                    nlp.update([example], drop=0.2, sgd=optimizer, losses=losses)
                except Exception as e:
                    pass # Skip any remaining bad data
            print(f"Iteration {itn+1}/{iterations} - Loss: {losses.get('ner', 0.0):.2f}")
            
    return nlp

# --- EXECUTE ---
nlp_model = train_spacy_v3(TRAIN_DATA, ITERATIONS)
nlp_model.to_disk(OUTPUT_DIR)
print(f"Model saved successfully to {OUTPUT_DIR}")


Loading data...
Cleaning whitespace (Author's method)...
Removing overlaps (Fix for Error E103)...
Data ready: 200 resumes.
Starting training for 15 iterations...


  d_xhat = N * dY - sum_dy - dist * var ** (-1.0) * sum_dy_dist
Test Engineer

Mangalore, Karnataka - E..." with entities "[(0, 10, 'Name'), (11, 24, 'Designation'), (26, 35...". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
java developer

Pune, Maharashtra..." with entities "[(0, 16, 'Name'), (17, 31, 'Designation'), (33, 37...". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
Senior Technical Lead - HCL Cisco

-..." with entities "[(0, 13, 'Name'), (14, 35, 'Designation'), (38, 41...". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
Program Manager (Software Delivery..." with entities "[(0, 15, 'Name'), (16, 51, 'Designation'), (54, 67...". Use `spacy.training.of

Iteration 1/15 - Loss: 16267.56
Iteration 2/15 - Loss: 5303.87
Iteration 3/15 - Loss: 4045.41
Iteration 4/15 - Loss: 3720.80
Iteration 5/15 - Loss: 3208.78
Iteration 6/15 - Loss: 3157.26
Iteration 7/15 - Loss: 2791.22
Iteration 8/15 - Loss: 2860.45
Iteration 9/15 - Loss: 2640.53
Iteration 10/15 - Loss: 2469.24
Iteration 11/15 - Loss: 2346.00
Iteration 12/15 - Loss: 2285.62
Iteration 13/15 - Loss: 2049.13
Iteration 14/15 - Loss: 2036.91
Iteration 15/15 - Loss: 2080.89
Model saved successfully to /kaggle/working/nlp_ner_model


In [5]:
# --- 3. LOAD MODEL & DATA ---
output_dir = "/kaggle/working/nlp_ner_model"
print(f"Loading model from {output_dir}...")
nlp = spacy.load(output_dir)

print(f"Loading TEST data from {TEST_DATA_PATH}...")
test_data = convert_dataturks_to_spacy(TEST_DATA_PATH)
test_data = trim_entity_spans(test_data)
test_data = remove_overlaps(test_data) 


print(f"Testing on {len(test_data)} resumes...")

# --- 4. VALIDATION LOOP ---
stats = {} 
file_count = 0

for text, annot in test_data:
    # A. Generate Prediction
    doc = nlp(text)
    
    # B. Write to Resume Text File (Replicating Author's Output)
    filename = f"resume{file_count}.txt"
    with open(filename, "w", encoding="utf-8") as f:
        # Extract entities into a dict
        d = {}
        for ent in doc.ents:
            d.setdefault(ent.label_, []).append(ent.text)
        
        # Print Skills to console (like author)
        if 'Skills' in d:
            print(f"resume {file_count} skills {d['Skills']}")
        
        # Write to file
        for label, items in d.items():
            f.write(f"\n\n{label}:\n")
            for item in set(items):
                f.write(f"{item.replace(chr(10), '')}\n") 
    
    # C. Calculate Metrics 
    # Only calculate if the test data actually HAS annotations (Gold Standard)
    if annot['entities']:
        predicted_labels = set(ent.label_ for ent in doc.ents)
        
        # Create Reference (Gold) Doc object
        doc_gold = nlp.make_doc(text)
        
        # Get Gold Tags
        gold_tags = biluo_tags_from_offsets(doc_gold, annot['entities'])
        
        # Get Predicted Tags
        pred_tags = [token.ent_iob_ + "-" + token.ent_type_ if token.ent_type_ else "O" for token in doc]

        # Initialize stats for labels if not exists
        for label in predicted_labels:
            if label not in stats:
                stats[label] = [0, 0.0, 0.0, 0.0, 0.0, 0] 

            y_true = []
            y_pred = []
            
            min_len = min(len(gold_tags), len(pred_tags))
            
            for i in range(min_len):
                # Check Gold
                g_tag = gold_tags[i]
                is_gold_label = (label in g_tag) 
                
                # Check Pred
                p_tag = pred_tags[i]
                is_pred_label = (label in p_tag)
                
                y_true.append(label if is_gold_label else "Not " + label)
                y_pred.append(label if is_pred_label else "Not " + label)

            # Calculate Scores
            p, r, f, _ = precision_recall_fscore_support(y_true, y_pred, average='weighted', labels=[label, "Not "+label], zero_division=0)
            a = accuracy_score(y_true, y_pred)
            
            # Update Stats
            stats[label][0] = 1 
            stats[label][1] += p
            stats[label][2] += r
            stats[label][3] += f
            stats[label][4] += a
            stats[label][5] += 1
        
    file_count += 1

# --- 5. PRINT FINAL RESULTS ---
print("\n" + "="*40)
print("       FINAL VALIDATION RESULTS       ")
print("="*40)

if not stats:
    print("No metrics calculated. (Does the test data have annotations?)")
else:
    for label, data in stats.items():
        if data[5] > 0: 
            count = data[5]
            print(f"\nFor Entity: {label}")
            print(f"Accuracy  : {(data[4] / count) * 100:.2f}%")
            print(f"Precision : {data[1] / count:.4f}")
            print(f"Recall    : {data[2] / count:.4f}")
            print(f"F-score   : {data[3] / count:.4f}")

Loading model from /kaggle/working/nlp_ner_model...
Loading TEST data from /kaggle/input/ner-training-dataset/testdata.json...
Testing on 20 resumes...
resume 0 skills ['C (Less than 1 year), Database (Less than 1 year), Database Management (Less than 1 year),\nDatabase Management System (Less than 1 year), Java (Less than 1 year)']
resume 1 skills ['Database (Less than 1 year), HTML (Less than 1 year), Linux. (Less than 1 year), MICROSOFT\nACCESS (Less than 1 year), MICROSOFT WINDOWS (Less than 1 year)']
resume 2 skills ['servicenow (1 year), Mainframe (3 years), cobol (3 years), Jcl (3 years), Teradata (3 years)']


Active member of IIIT Committee in ..." with entities "[(0, 14, 'Name'), (62, 68, 'Location'), (104, 148,...". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.


resume 3 skills ['Database (3 years), SQL (3 years), Sql Dba']
resume 4 skills ['SEARCH ENGINE MARKETING (2 years), SEM (2 years), ACCESS (Less than 1 year), AJAX (Less\nthan 1 year), APACHE (Less than 1 year)']


Operational Analyst (SQL DBA) Enginee..." with entities "[(0, 12, 'Name'), (13, 51, 'Designation'), (54, 60...". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
lecturer - oracle tutorials

Mumbai,..." with entities "[(0, 13, 'Name'), (14, 22, 'Designation'), (25, 41...". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.


resume 5 skills ['JAVA (1 year), C++ (Less than 1 year), Hadoop (Less than 1 year), HADOOP (Less than 1 year),\nCSS (Less than 1 year)']
resume 6 skills ['PMP trained six sigma yellow belt']


Automation developer

- Email me on In..." with entities "[(0, 11, 'Name'), (12, 32, 'Designation'), (56, 97...". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
Senior Program coordinator - oracle India..." with entities "[(0, 8, 'Name'), (9, 35, 'Designation'), (38, 58, ...". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.


resume 7 skills ['ANSYS (Less than 1 year), CATIA (Less than 1 year), CREO (Less than 1 year), PARAMETRIC\n(Less than 1 year), PYTHON (Less than 1 year), Selenium, Selenium Webdriver, Testing,\nFunctional Testing, Automation Testing, Regression Testing, Quality Assurance']
resume 9 skills ['database (Less than 1 year), Git (Less than 1 year), Java (Less than 1 year), JIRA (Less than 1\nyear), life cycle (Less than 1 year)']


Team Lead - Microsoft

- Email me on I..." with entities "[(0, 11, 'Name'), (12, 21, 'Designation'), (24, 33...". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.


resume 10 skills ['Invoice (5 years), posting. (5 years), TRAINING (4 years)']
resume 11 skills ['Requirement Analysis (Less than 1 year), Sales support (Less than 1 year), Test Planning (Less\nthan 1 year)']
resume 13 skills ['CSS (Less than 1 year), DHCP (Less than 1 year), HSRP. (Less than 1 year), routing protocols.\n(Less than 1 year), Voip (2 years)']


Senior Analyst - Cisco

New Delh..." with entities "[(0, 17, 'Name'), (42, 51, 'Location'), (81, 128, ...". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.


resume 14 skills ['Teaching (3 years)']
resume 15 skills ['Excel (Less than 1 year), MS Excel (Less than 1 year), Tally (Less than 1 year)']
resume 16 skills ['Expertise on Manual testing of the applications\n• Solid experience on Selenium using Cucumber automation\n• Solid experience of AGILE Methodologies, AGILE Manifesto and Scrum processes\n• Solid experience of Defect/Test Management tools like JIRA, QualityCenter.\n• Proven skills in SQL, RDBMS and UNIX variant like LINUX\n• Expertise in writing SQL Queries, SQL Scripts and performing Database testing\n• Proven skills in Leading and training teams, Test Management and Meeting the deadlines\n• Strong communication & interaction with Clients, Developers, Business Analysts, Management\n• Strong experience of E', 'TESTING (9 years), JAVA (3 years), JUNIT (2 years), ORACLE (2 years), API (1 year)']


Technology Analyst - Infosys Limit..." with entities "[(0, 15, 'Name'), (16, 34, 'Designation'), (37, 52...". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.


resume 17 skills ['AWS Devops']
resume 18 skills ['ACCESS (Less than 1 year), BUYING (Less than 1 year), BUYING/PROCUREMENT (Less than 1\nyear), CSS (Less than 1 year),', 'DATABASE']


Deployed chef for configuration manag..." with entities "[(0, 12, 'Name'), (13, 70, 'Designation'), (73, 78...". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.


resume 19 skills ['problem solving (Less than 1 year), project lifecycle (Less than 1 year), project manager (Less\nthan 1 year), technical assistance. (Less than 1 year)']

       FINAL VALIDATION RESULTS       

For Entity: Designation
Accuracy  : 99.19%
Precision : 0.9945
Recall    : 0.9919
F-score   : 0.9919

For Entity: Name
Accuracy  : 98.75%
Precision : 0.9893
Recall    : 0.9875
F-score   : 0.9863

For Entity: College Name
Accuracy  : 98.75%
Precision : 0.9908
Recall    : 0.9875
F-score   : 0.9866

For Entity: Skills
Accuracy  : 90.92%
Precision : 0.9342
Recall    : 0.9092
F-score   : 0.9032

For Entity: Companies worked at
Accuracy  : 99.63%
Precision : 0.9976
Recall    : 0.9963
F-score   : 0.9968

For Entity: Location
Accuracy  : 99.67%
Precision : 0.9967
Recall    : 0.9967
F-score   : 0.9960

For Entity: Degree
Accuracy  : 99.31%
Precision : 0.9948
Recall    : 0.9931
F-score   : 0.9931

For Entity: Email Address
Accuracy  : 99.86%
Precision : 0.9997
Recall    : 0.9986
F-score

In [6]:
model_path = "/kaggle/working/nlp_ner_model"
job_dir = "/kaggle/input/job-recom-dataset/"
test_data_path = "/kaggle/input/ner-training-dataset/testdata.json"

print(f"Loading model from {model_path}...")
nlp = spacy.load(model_path)

Loading model from /kaggle/working/nlp_ner_model...


In [7]:
# --- 2. HARDCODED KNOWLEDGE BASE (From your code) ---
# This ensures we don't miss core skills for specific roles, exactly as in your snippet.
hardcoded_skills_map = {
    "phpdeveloper.json": ['PHP', 'Laravel', 'CodeIgniter', 'Symfony', 'Zend', 'Phalcon', 'CakePHP', 'Yii', 'FuelPHP', 'React', 'Vue', 'Angular', 'Ember', 'Backbone'],
    "javadeveloper.json": ['IHS', 'WAS', 'Java EE', 'SQL Server', '.NET core', 'C#', 'ASP.NET', 'Rdlc', 'Linq', 'Sql', 'Web Api', 'Mvc', 'Javascript', 'Web Services', 'Oracle', 'MS SQL'],
    "datascientist.json": ['Data Science', 'Python', 'Machine Learning', 'SAS', 'Java', 'Scala', 'Hadoop', 'Hive', 'Bigdata', 'Programming', 'SQL server reporting', 'Msbi Ssis', 'Ssrs', 'Msbi', 'Sql Reporting', 'Artificial Intelligence', 'Pandas', 'Pyspark', 'Sklearn', 'Flask', 'Django', 'Map Reduce', 'Parametric Design', 'Modeling', 'Regression', 'Patterns', 'Data Mining', 'Text Mining', 'Oops', 'Deep Learning', 'Web Analytics', 'Time Series', 'Regression', 'Tensorflow', 'Azure', 'Linear Regression', 'Logistic Regression', 'Decision Tree', 'Random Forest', 'Data Structure', 'Computer Vision'],
    "backenddeveloper.json": ['MySQL', 'PostgreSQL', 'Microsoft Access', 'SQL Server', 'FileMaker', 'Oracle', 'RDBMS', 'dBASE', 'Clipper', 'FoxPro', 'Firebase', 'Mongodb'],
    "dataengineer.json": ['java', 'J2EE', 'Oracle Fusion','Oracle Cloud', 'Salesforce','Devops Android', 'Business Analyst', 'UI Developer', 'DBAs','Embedded Systems', '.NET', 'Hadoop', 'SQL Developer', 'Big Data','Tableau', 'Networking', 'Etl', 'Informatica', 'Ios', 'Quality Analyst','Project Manager', 'Python']
}

# --- 3. HELPER FUNCTIONS ---

def find_skills(text, nlp_model):
    """Extracts skills using the trained NER model."""
    doc = nlp_model(text)
    skills = []
    for ent in doc.ents:
        if ent.label_ == "Skills":
            skills.append(ent.text)
    return skills

def process_jobs(job_folder, nlp_model):
    """Reads job JSONs, extracts NER skills, and adds Hardcoded skills."""
    processed_jobs = []
    json_files = glob.glob(os.path.join(job_folder, "*.json"))
    
    for file_path in json_files:
        file_name = os.path.basename(file_path)
        
        with open(file_path, 'r', encoding='utf-8') as f:
            data = json.load(f)
            
            # Combine all descriptions in the file into one text blob
            full_text = ""
            if isinstance(data, list):
                for job in data:
                    desc = job.get('jobdescription') or job.get('description', '')
                    full_text += ' ' + desc
            elif isinstance(data, dict):
                 full_text = data.get('jobdescription', '')

            # 1. Get Model Skills
            extracted_skills = find_skills(full_text, nlp_model)
            
            # 2. Add Hardcoded Skills (if matches filename)
            if file_name in hardcoded_skills_map:
                extracted_skills.extend(hardcoded_skills_map[file_name])
            
            # 3. Remove Duplicates
            final_skills = list(set(extracted_skills))
            
            processed_jobs.append({
                'name': file_name,
                'skills': final_skills
            })
            print(f"Processed Job: {file_name} | Total Skills: {len(final_skills)}")
            
    return processed_jobs

def process_resumes(test_file_path, nlp_model):
    """Reads the testdata.json and processes each line as a resume."""
    processed_cvs = []
    with open(test_file_path, 'r', encoding='utf-8') as f:
        lines = f.readlines()
        
    for i, line in enumerate(lines):
        data = json.loads(line)
        text = data['content']
        skills = find_skills(text, nlp_model)
        
        # Create a dummy filename like 'resume_0.txt'
        processed_cvs.append({
            'name': f"resume_{i}.txt",
            'skills': list(set(skills)) # Unique skills only
        })
    print(f"Processed {len(processed_cvs)} Resumes.")
    return processed_cvs

def calculate_match(source_skills, target_skills):
    """Calculates overlap percentage."""
    if not target_skills: 
        return 0.0
    
    match_count = 0
    for skill in source_skills:
        # Case insensitive matching
        if any(skill.lower() == target_skill.lower() for target_skill in target_skills):
            match_count += 1
            
    return (match_count / len(target_skills)) * 100

# --- 4. EXECUTION ---

# A. Create Lists
print("\n--- Creating Job List ---")
jobs_list = process_jobs(job_dir, nlp)

print("\n--- Creating CV List ---")
cvs_list = process_resumes(test_data_path, nlp)

# B. SCENARIO 1: Match Resumes to Jobs (Find best job for a person)
print("\n" + "="*50)
print(" SCENARIO 1: RECOMMENDING JOBS FOR RESUMES ")
print("="*50)

# Let's take the first 5 resumes to keep output clean
for cv in cvs_list[:5]: 
    print(f"\nFinding jobs for: {cv['name']}")
    matches = []
    for job in jobs_list:
        # We calculate how many of the JOB'S required skills the CV has
        pct = calculate_match(cv['skills'], job['skills'])
        matches.append({'job_name': job['name'], 'pct': pct})
    
    # Sort descending
    matches = sorted(matches, key=lambda x: x['pct'], reverse=True)
    
    for m in matches:
        print(f"  -> Match with {m['job_name']}: {m['pct']:.2f}%")

# C. SCENARIO 2: Match Jobs to Resumes (Find best candidate for a job)
print("\n" + "="*50)
print(" SCENARIO 2: FINDING CANDIDATES FOR JOBS ")
print("="*50)

for job in jobs_list:
    print(f"\nFinding candidates for: {job['name']}")
    matches = []
    for cv in cvs_list:
        # We calculate how many of the JOB'S required skills the CV has
        pct = calculate_match(cv['skills'], job['skills'])
        matches.append({'cv_name': cv['name'], 'pct': pct})
        
    # Sort descending
    matches = sorted(matches, key=lambda x: x['pct'], reverse=True)
    
    # Show top 3 candidates
    for i in range(min(5, len(matches))):
        print(f"  {i+1}. {matches[i]['cv_name']} : {matches[i]['pct']:.2f}%")


--- Creating Job List ---


NameError: name 'glob' is not defined