In [3]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer
from sklearn.preprocessing import LabelEncoder
import textwrap

In [4]:
df_resumes = pd.read_csv('../PreProcessingResumes/processed_data/Resumes.csv')
df_resumes.head()

Unnamed: 0,ID,Resume_str,Category
0,28111403,president chief executive officer executive pr...,INFORMATION-TECHNOLOGY
1,29908929,business development marketing summary busines...,BUSINESS-DEVELOPMENT
2,15261348,chef and restaurant manager summary chef with ...,CHEF
3,75435017,charge nurse professional summary objective to...,HEALTHCARE
4,41152404,test analyst intern contractor profile years o...,INFORMATION-TECHNOLOGY


In [5]:
df_resumes.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2483 entries, 0 to 2482
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   ID          2483 non-null   int64 
 1   Resume_str  2483 non-null   object
 2   Category    2483 non-null   object
dtypes: int64(1), object(2)
memory usage: 58.3+ KB


In [6]:
df_jobs = pd.read_csv('../PreProcessingJobs/processed_data/JobDescription.csv')
df_jobs.head()

Unnamed: 0,job_id,title,description
0,921716,Marketing Coordinator,job descriptiona leading real estate firm in n...
1,1829192,Mental Health Therapist/Counselor,at aspen therapy and wellness we are committed...
2,10998357,Assitant Restaurant Manager,the national exemplar is accepting application...
3,23221523,Senior Elder Law / Trusts and Estates Associat...,senior associate attorney elder law trusts and...
4,91700727,Economic Development and Planning Intern,job summary the economic development planning ...


In [7]:
df_jobs.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 122545 entries, 0 to 122544
Data columns (total 3 columns):
 #   Column       Non-Null Count   Dtype 
---  ------       --------------   ----- 
 0   job_id       122545 non-null  int64 
 1   title        122545 non-null  object
 2   description  122545 non-null  object
dtypes: int64(1), object(2)
memory usage: 2.8+ MB


### Resume & Job Description Matching System

In [8]:
# Convert resumes column to a list
resumes = df_resumes["Resume_str"].tolist()
    
# Encode category labels as integers
encoder = LabelEncoder()
labels = encoder.fit_transform(df_resumes["Category"])

In [9]:
# Load the SBERT model for generating sentence embeddings
sbert_model = SentenceTransformer('all-MiniLM-L12-v2')

In [10]:
resumes_embed = sbert_model.encode(resumes, show_progress_bar=True)

Batches:   0%|          | 0/78 [00:00<?, ?it/s]

### Test

In [None]:
# Uncomment this block to select random job posting

# Select a random job posting from the DataFrame
#job_sample = df_jobs.sample(n=1).iloc[0]

#job_desc = job_sample['description']

In [None]:
# Uncomment this block to select a specific job posting by job_id

# Select the job with the given job_id
job_sample = df_jobs[df_jobs['job_id'] == 3895205640].iloc[0]  

job_desc = job_sample['description']  

In [24]:
# Generate the embedding for the job description using the sentence-transformers model
job_embed = sbert_model.encode([job_desc], show_progress_bar=True)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

In [25]:
# Compute cosine similarity between the job description and each resume, returning a flat array of similarity scores
similarity_vector = cosine_similarity(job_embed, resumes_embed).flatten()

In [26]:
top_n = 5  # Number of top matches to retrieve

# Get indices of top_n resumes with highest similarity scores
top_matches = similarity_vector.argsort()[::-1][:top_n]

results = []
for rank, cv_idx in enumerate(top_matches, start=1):
    results.append({
        'rank':             rank,
        'job_id':           job_sample['job_id'],
        'job_title':        job_sample['title'],
        'cv_id':            df_resumes.iloc[cv_idx]['ID'],      
        'cv_category':      df_resumes.iloc[cv_idx]['Category'],
        'similarity_score': similarity_vector[cv_idx] 
    })

df_matches = pd.DataFrame(results)
df_matches

Unnamed: 0,rank,job_id,job_title,cv_id,cv_category,similarity_score
0,1,3895205640,"VP, Business Development",27375577,BUSINESS-DEVELOPMENT,0.664728
1,2,3895205640,"VP, Business Development",15233524,BUSINESS-DEVELOPMENT,0.659889
2,3,3895205640,"VP, Business Development",38688388,BUSINESS-DEVELOPMENT,0.65677
3,4,3895205640,"VP, Business Development",24647386,BUSINESS-DEVELOPMENT,0.641772
4,5,3895205640,"VP, Business Development",59696315,BUSINESS-DEVELOPMENT,0.639877


In [27]:
description = job_sample['description']
print("\n".join(textwrap.wrap(description, width=140)))

long term growth leads to business success and here at phaedon we welcome prospective team members who can help push our vision forward we
re currently searching for an experienced vp of business development who can partner with multiple departments to drive measurable results
to grow our business our ideal candidate will be able to implement an effective sales approach that expands our reach builds our pipeline
and strengthens prospect relationships they will actively seek out business opportunities that can boost revenue and set our company apart
position summary phaedon is seeking a vp business development to work within a strong and innovative team environment on our loyalty team
this position requires loyalty sales and business development experience with building and proactively managing new business non organic
pipelines and negotiating and closing opportunities mission we help our clients simplify complex milestones and ignite brand love bringing
intelligent imagination to solv