I will use 3 ways 
1- TF-IDF + cosine similarity
2- SBERT 
3- OpenAI

In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
df_cvs = pd.read_csv("parsed_cvs.csv")
df_jobs = pd.read_csv("job_scrapped.csv")

In [6]:
df_cvs['cv_text']= df_cvs['cv_text'].str.lower()
df_jobs['Description']= df_jobs['Description'].str.lower()

# Combine all job descriptions into one list
job_texts = df_jobs["Description"].tolist()

# Initialize TF-IDF Vectorizer
vectorizer = TfidfVectorizer()

# Fit the vectorizer on both job descriptions & CVs
tfidf_matrix = vectorizer.fit_transform(job_texts + df_cvs["cv_text"].tolist())

# Split into job and CV vectors
job_vectors = tfidf_matrix[:len(job_texts)]
cv_vectors = tfidf_matrix[len(job_texts):]

# Compute cosine similarity (CV vs Job Descriptions)
similarity_matrix = cosine_similarity(cv_vectors, job_vectors)

# Convert to DataFrame for better visualization
similarity_df = pd.DataFrame(similarity_matrix, index=df_cvs["filename"], columns=df_jobs["Job Title"])

# Save results
similarity_df.to_csv("cv_job_similarity_scores.csv")

# Show results
print(similarity_df.head())

Job Title                          Software Engineer, AI Intern (Summer 2025)  ...  Data Scientist (L5) - Product Promotion & Algorithm Performance
filename                                                                       ...                                                                 
Abiral_Pandey_Fullstack_Java.docx                                    0.262686  ...                                           0.305367              
Achyuth Resume_8.docx                                                0.272755  ...                                           0.323314              
Adelina_Erimia_PMP1.docx                                             0.227668  ...                                           0.284344              
Adhi Gopalam - SM.docx                                               0.311761  ...                                           0.367408              
AjayKumar.docx                                                       0.328151  ...                              

In [7]:
# Load similarity scores
similarity_df = pd.read_csv("cv_job_similarity_scores.csv", index_col=0)

# Find the highest-matching CV for each job
top_matches = {}

for job in similarity_df.columns:
    sorted_cvs = similarity_df[job].sort_values(ascending=False)  # Sort CVs by similarity
    top_cv = sorted_cvs.index[0]  # Get the best match
    top_score = sorted_cvs.iloc[0]  # Get the highest similarity score
    top_matches[job] = {"Top CV": top_cv, "Similarity Score": top_score}

# Convert to DataFrame for better visualization
df_top_matches = pd.DataFrame.from_dict(top_matches, orient="index")

# Save results
df_top_matches.to_csv("top_matched_cvs.csv")

# Show results
print(df_top_matches)

                                                                            Top CV  Similarity Score
Software Engineer, AI Intern (Summer 2025)          Srivatsan_Project_Manager.docx          0.341067
Software Engineer, AI Intern (Summer 2025).1        Srivatsan_Project_Manager.docx          0.341067
Full-Stack Software Engineer (New graduates: Un...          Komala BSA Resume.docx          0.414860
Software Engineer I/II                                    Siddhartha Gandroju.docx          0.315599
Software Engineer                                           Komala BSA Resume.docx          0.349139
Software Engineer.1                                 Srivatsan_Project_Manager.docx          0.527066
Software Engineer.2                                 Srivatsan_Project_Manager.docx          0.373392
Software Engineer - New Grad                                Komala BSA Resume.docx          0.443737
Software Engineer.3                                 Srivatsan_Project_Manager.docx         