In [1]:
import pandas as pd

# Load data
candidates = pd.read_csv('../data/mock_candidates.csv')
jobs = pd.read_csv('../data/mock_jobs.csv')

# Preview
candidates.head()


Unnamed: 0,candidate_id,skills,years_experience,location,mindset_score
0,C001,"python, sql, excel",3,Remote,80
1,C002,"javascript, react, python",5,"New York, NY",85
2,C003,"salesforce, sql, customer_service",2,"Boston, MA",75
3,C004,"excel, sql",1,Remote,70
4,C005,"react, node, javascript",4,Remote,88


In [5]:
# Match Function for Skills

def skill_match_score(candidate_skills, required_skills):
    candidate_set = set(candidate_skills.lower().split(", "))
    required_set = set(required_skills.lower().split(", "))
    if not required_set:
        return 0
    return len(candidate_set & required_set) / len(required_set)
# Gives a score like 1.0 (100% match), 0.5 (half match), etc.

In [6]:
# Match Function for Location

def location_match(candidate_location, job_location):
    # Remote jobs can match anyone, or candidate preferring Remote
    if job_location.lower() == "remote" or candidate_location.lower() == "remote":
        return 1
    return int(candidate_location.strip().lower() == job_location.strip().lower())

In [7]:
# Match Function for Experience

def experience_score(candidate_years, job_min_years):
    return min(1, candidate_years / job_min_years) if job_min_years > 0 else 1


In [8]:
# Total Match Score Calculator - Combine All

def total_match_score(candidate, job):
    skills = skill_match_score(candidate["skills"], job["required_skills"])
    location = location_match(candidate["location"], job["location"])
    experience = experience_score(candidate["years_experience"], job["min_years_experience"])
    
    mindset = candidate["mindset_score"] / 100  # Normalize 0–1
    mindset_weight = job.get("weight_mindset", 0.15)
    
    # Base weights
    skill_weight = 0.5
    exp_weight = 0.2
    loc_weight = 0.15

    # Final score
    score = (
        skill_weight * skills +
        exp_weight * experience +
        loc_weight * location +
        mindset_weight * mindset
    )
    
    return round(score, 3)

In [9]:
# Match All Candidates to a Job 

# Pick one job for ex. the first one

job = jobs.iloc[0]  # Change index to test other jobs

# Match each candidate
match_results = candidates.copy()
match_results["match_score"] = candidates.apply(lambda row: total_match_score(row, job), axis=1)

# Sort by score
match_results = match_results.sort_values(by="match_score", ascending=False)

match_results[["candidate_id", "match_score", "skills", "location"]]


Unnamed: 0,candidate_id,match_score,skills,location
0,C001,1.01,"python, sql, excel",Remote
1,C002,0.77,"javascript, react, python","New York, NY"
2,C003,0.75,"salesforce, sql, customer_service","Boston, MA"
3,C004,0.64,"excel, sql",Remote
4,C005,0.526,"react, node, javascript",Remote


In [10]:
# Create a New List to Store Matches
all_matches = []

In [11]:
# Loop Through Every Candidate and Every Job

for _, candidate in candidates.iterrows():
    for _, job in jobs.iterrows():
        score = total_match_score(candidate, job)
        all_matches.append({
            "candidate_id": candidate["candidate_id"],
            "job_id": job["job_id"],
            "match_score": score,
            "candidate_skills": candidate["skills"],
            "job_skills": job["required_skills"],
            "candidate_location": candidate["location"],
            "job_location": job["location"],
            "candidate_mindset_score": candidate["mindset_score"]
        })


In [12]:
# Convert into DataFrame
matches_df = pd.DataFrame(all_matches)

# Each row = (one candidate vs. one job) with match score and details

In [13]:
# Sort the Matches by Score with Top Matches First

matches_df = matches_df.sort_values(by="match_score", ascending=False)

# Preview the top matches
matches_df.head(10)

Unnamed: 0,candidate_id,job_id,match_score,candidate_skills,job_skills,candidate_location,job_location,candidate_mindset_score
0,C001,J101,1.01,"python, sql, excel","python, sql",Remote,Remote,80
14,C005,J103,0.982,"react, node, javascript","javascript, react",Remote,Remote,88
5,C002,J103,0.978,"javascript, react, python","javascript, react","New York, NY",Remote,85
7,C003,J102,0.925,"salesforce, sql, customer_service","salesforce, customer_service","Boston, MA","Boston, MA",75
3,C002,J101,0.77,"javascript, react, python","python, sql","New York, NY",Remote,85
6,C003,J101,0.75,"salesforce, sql, customer_service","python, sql","Boston, MA",Remote,75
9,C004,J101,0.64,"excel, sql","python, sql",Remote,Remote,70
12,C005,J101,0.526,"react, node, javascript","python, sql",Remote,Remote,88
2,C001,J103,0.47,"python, sql, excel","javascript, react",Remote,Remote,80
13,C005,J102,0.438,"react, node, javascript","salesforce, customer_service",Remote,"Boston, MA",88


In [15]:
# Full Matching Code

all_matches = []

for _, candidate in candidates.iterrows():
    for _, job in jobs.iterrows():
        score = total_match_score(candidate, job)
        all_matches.append({
            "candidate_id": candidate["candidate_id"],
            "job_id": job["job_id"],
            "match_score": score,
            "candidate_skills": candidate["skills"],
            "job_skills": job["required_skills"],
            "candidate_location": candidate["location"],
            "job_location": job["location"],
            "candidate_mindset_score": candidate["mindset_score"]
        })

# Turn match list into a DataFrame
matches_df = pd.DataFrame(all_matches)
# Sort raw match score
matches_df = matches_df.sort_values(by="match_score", ascending=False)
# Format match_score clean
matches_df["match_score_pct"] = (matches_df["match_score"] * 100).round(1).astype(str) + '%'


# Display top 10 matches
matches_df[["candidate_id", "job_id", "match_score_pct", "candidate_skills", "job_skills", "candidate_location", "job_location"]].head(10)


Unnamed: 0,candidate_id,job_id,match_score_pct,candidate_skills,job_skills,candidate_location,job_location
0,C001,J101,101.0%,"python, sql, excel","python, sql",Remote,Remote
14,C005,J103,98.2%,"react, node, javascript","javascript, react",Remote,Remote
5,C002,J103,97.8%,"javascript, react, python","javascript, react","New York, NY",Remote
7,C003,J102,92.5%,"salesforce, sql, customer_service","salesforce, customer_service","Boston, MA","Boston, MA"
3,C002,J101,77.0%,"javascript, react, python","python, sql","New York, NY",Remote
6,C003,J101,75.0%,"salesforce, sql, customer_service","python, sql","Boston, MA",Remote
9,C004,J101,64.0%,"excel, sql","python, sql",Remote,Remote
12,C005,J101,52.6%,"react, node, javascript","python, sql",Remote,Remote
2,C001,J103,47.0%,"python, sql, excel","javascript, react",Remote,Remote
13,C005,J102,43.8%,"react, node, javascript","salesforce, customer_service",Remote,"Boston, MA"


In [16]:
# Save Match Table to CSV
matches_df.to_csv('../data/final_candidate_job_matches.csv', index=False)

In [17]:
# Filter for strong matches (>= 60%)
strong_matches_df = matches_df[matches_df["match_score"] >= 0.6]

# Save strong matches separately
strong_matches_df.to_csv('../data/strong_candidate_job_matches.csv', index=False)