In [2]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer
from sklearn.preprocessing import LabelEncoder
from google import genai

In [3]:
KEY = "AIzaSyC_LY9Bqqc6KYt35PMEU8Z5HgV1Fs9DAng"
client = genai.Client(api_key=KEY)

In [None]:
def get_similarity_score(resume_text, job_description):
    """
    Query the LLM to get a similarity score between resume and job description.
    Returns a value from 1-10 indicating how well the resume fits the job.
    """
    
    prompt = f"""Analyze the match between this RESUME and JOB DESCRIPTION. 
    As a Human Resources recruiter, your task is to evaluate whether the professional field of the candidate aligns with that of the job description.
    
    RESUME:
    {resume_text}
    
    JOB DESCRIPTION:
    {job_description}
    
    Given a resume and a job description, assign an integer score from 1 to 10 indicating how well the resume matches the position.
    
    Provide only a single integer score representing how well this resume matches the job requirements.
    Don't provide any explanation, just return the integer score between 1 and 10.
    """
    
    response = client.models.generate_content(
        model = "gemini-2.0-flash",
        contents= prompt,
    )

    content = response.text
    return content

In [5]:
df_resumes = pd.read_csv('../../PreProcessingResumes/processed_data/Resume.csv')
df_jobs = pd.read_csv('../../PreProcessingJobs/processed_data/JobDescription.csv')

# Convert resumes column to a list
resumes = df_resumes["Resume_str"].tolist()
    
# Encode category labels as integers
encoder = LabelEncoder()
labels = encoder.fit_transform(df_resumes["Category"])
category_names = encoder.classes_.tolist()

# Load the SBERT model for generating sentence embeddings
sbert_model = SentenceTransformer('all-MiniLM-L12-v2')

resumes_embed = sbert_model.encode(resumes, show_progress_bar=True)

Batches:   0%|          | 0/78 [00:00<?, ?it/s]

In [6]:
# job_sample = df_jobs[df_jobs['job_id'] == 3904362263].iloc[0]           # COSINE WORKS VERY BAD



job_sample = df_jobs.sample(n=1).iloc[0]
# job_sample = df_jobs[df_jobs['job_id'] == 3903825174].iloc[0]
job_desc = job_sample['description']

job_embed = sbert_model.encode([job_desc], show_progress_bar=True)

similarity_vector = cosine_similarity(job_embed, resumes_embed).flatten()

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

In [7]:
top_n = 5  # Number of top matches to retrieve

# Get indices of top_n resumes with highest similarity scores
top_matches = similarity_vector.argsort()[::-1][:top_n]

results = []
for rank, cv_idx in enumerate(top_matches, start=1):
    
    score = get_similarity_score(df_resumes.iloc[cv_idx]['Resume_str'], job_sample['description'])
    
    results.append({
        'rank':             rank,
        'job_id':           job_sample['job_id'],
        'job_title':        job_sample['title'],
        'cv_id':            df_resumes.iloc[cv_idx]['ID'],      
        'cv_category':      df_resumes.iloc[cv_idx]['Category'],
        'similarity_score': similarity_vector[cv_idx] ,
        'gemini_score':     score
    })

df_matches = pd.DataFrame(results)
df_matches

Unnamed: 0,rank,job_id,job_title,cv_id,cv_category,similarity_score,gemini_score
0,1,3903825174,Branch Banking Client Consultant I (Part Time),28895997,BANKING,0.698192,7\n
1,2,3903825174,Branch Banking Client Consultant I (Part Time),13982572,BANKING,0.6696,8\n
2,3,3903825174,Branch Banking Client Consultant I (Part Time),45167858,BANKING,0.65882,8\n
3,4,3903825174,Branch Banking Client Consultant I (Part Time),74552449,BANKING,0.654799,8\n
4,5,3903825174,Branch Banking Client Consultant I (Part Time),54421668,BANKING,0.648656,7\n


In [8]:
resume_sample = df_resumes[df_resumes['ID'] == 35121930].iloc[0] 
resume_sample['Resume_str']

'manufacturing technician ops coordinator professional summary to apply my current education to obtain placement in an agency that provides services to clients who are seeking treatment for substance use abuse intimate partner violence and ptsd professional experience to company name volunteered at albuquerque fashion week i worked in a booth that displayed business like clothes and shoes for homeless women and men to reintegrate them into the workforce walking for chiari june help organize a charity walk that raised money for individuals living with chiari master practicum perfectly imperfect hours iop substance use abuse treatment program using the matrix modality hrt treatment using the wexler model to treat clients that are referred to the agency for intimate partner violence seeking safety treatment program that is geared towards working with individuals who are experiencing possible violence ptsd and substance use abuse worked with clients in a group setting helped clients identi

In [9]:
job_sample['description']

'position title branch banking client consultant i part time location bedford stuyvesant job summary a branch banking client consultant i provides customers with high quality prompt and professional financial services responsible for interacting with clients and providing exceptional service by greeting them as they enter one of our branches and providing them with the services needed to meet their financial goals this role will process financial transactions in an accurate efficient and friendly manner while maintaining operational standards and assist the team in meeting branch goals pay range pay range local minimum wage job responsibilities sales performance support team efforts to achieve growth targets in financial performance outstanding deposits outstanding loans non interest income and primary client acquisition actively seek and deliver the right client introductions to the right team member s to achieve growth targets and execute successful sales initiativeclient experience 

In [10]:
print(f"""
    Analyze the match between this RESUME and JOB DESCRIPTION. 
    As a Human Resources recruiter, your task is to evaluate whether the professional field of the candidate aligns with that of the job description.
    
    RESUME:
    {resume_sample['Resume_str']}
    
    JOB DESCRIPTION:
    {job_sample['description']}
    
    Given a resume and a job description, assign an integer score from 1 to 10 indicating how well the resume matches the position.
    Ignore experience, specific skills, or seniority level — focus only on the alignment of the professional domain.

    Provide a single integer score from 1 to 10 representing how well this resume matches the job requirements,
    followed by a short explanation of why you gave that score.
    """)


    Analyze the match between this RESUME and JOB DESCRIPTION. 
    As a Human Resources recruiter, your task is to evaluate whether the professional field of the candidate aligns with that of the job description.

    RESUME:
    manufacturing technician ops coordinator professional summary to apply my current education to obtain placement in an agency that provides services to clients who are seeking treatment for substance use abuse intimate partner violence and ptsd professional experience to company name volunteered at albuquerque fashion week i worked in a booth that displayed business like clothes and shoes for homeless women and men to reintegrate them into the workforce walking for chiari june help organize a charity walk that raised money for individuals living with chiari master practicum perfectly imperfect hours iop substance use abuse treatment program using the matrix modality hrt treatment using the wexler model to treat clients that are referred to the agency for inti