# Job-candidate matching

In [1]:
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer, util

  from .autonotebook import tqdm as notebook_tqdm





##### 1. Load datasets

In [2]:
cv_df = pd.read_csv("cv_records.csv")
job_df = pd.read_csv("job_records.csv")

##### 2. Load model and encode texts

In [3]:
model = SentenceTransformer("TechWolf/JobBERT-v2")

In [4]:
cv_embeddings = model.encode(cv_df["description"].tolist(), normalize_embeddings=True)
job_embeddings = model.encode(job_df["text"].tolist(), normalize_embeddings=True)

##### 3. Build all matched pairs (same ROLE)

In [5]:
cv_id_to_idx = dict(zip(cv_df["cv_id"], range(len(cv_df))))
job_id_to_idx = dict(zip(job_df["job_id"], range(len(job_df))))

pairs = []  # (cv_id, job_id)

for _, cv in cv_df.iterrows():
    role = cv["role"]

    # jobs with the same role (domain is only on job_records.csv)
    matched_jobs = job_df[job_df["role"] == role]

    for _, job in matched_jobs.iterrows():
        pairs.append((cv["cv_id"], job["job_id"]))

print(f"Total matched pairs: {len(pairs)}")

Total matched pairs: 11520


##### 4. Compute similarity only for matched pairs

In [6]:
records = []

for cv_id, job_id in pairs:
    cv_idx = cv_id_to_idx[cv_id]
    job_idx = job_id_to_idx[job_id]

    sim = util.cos_sim(
        cv_embeddings[cv_idx],
        job_embeddings[job_idx]
    ).item()

    records.append({
        "cv_id": cv_id,
        "name": cv_df.loc[cv_idx, "name"],
        "gender": cv_df.loc[cv_idx, "gender"],
        "ethnicity": cv_df.loc[cv_idx, "ethnicity"],
        "role": cv_df.loc[cv_idx, "role"],

        "job_id": job_id,
        "job_role": job_df.loc[job_idx, "role"],
        "job_domain": job_df.loc[job_idx, "domain"] if "domain" in job_df.columns else None,
        "job_level": job_df.loc[job_idx, "level"],
        "job_text": job_df.loc[job_idx, "text"],

        "similarity": sim
    })

out_df = pd.DataFrame(records)
out_df.to_csv("cv_job_similarity.csv", index=False)

print("Saved → cv_job_similarity.csv")
out_df.head()

Saved → cv_job_similarity.csv


Unnamed: 0,cv_id,name,gender,ethnicity,role,job_id,job_role,job_domain,job_level,job_text,similarity
0,CV_0,Ahmed Hassan,male,arabic_middle_eastern,Software Engineer,JOB_0,Software Engineer,tech,Junior,Hiring a Junior Software Engineer to implement...,0.549975
1,CV_0,Ahmed Hassan,male,arabic_middle_eastern,Software Engineer,JOB_1,Software Engineer,tech,Junior,Junior Software Engineer role focused on contr...,0.595809
2,CV_0,Ahmed Hassan,male,arabic_middle_eastern,Software Engineer,JOB_2,Software Engineer,tech,Mid,"Seeking a Software Engineer to design APIs, ma...",0.661968
3,CV_0,Ahmed Hassan,male,arabic_middle_eastern,Software Engineer,JOB_3,Software Engineer,tech,Mid,Software Engineer needed to work on applicatio...,0.619108
4,CV_0,Ahmed Hassan,male,arabic_middle_eastern,Software Engineer,JOB_4,Software Engineer,tech,Senior,Senior Software Engineer role involving owners...,0.582217
