# Module 4 – Resume Similarity & Ranking

This notebook loads TF-IDF vectors generated in Module 3
and computes cosine similarity scores between job description
and resumes. Resumes are then ranked based on relevance.


# Import Libraries

In [34]:
import pickle
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

#Load TF-IDF Output

In [38]:
with open("module_3_feature_extraction/tfidf_vectors.pkl", "rb") as f:
    tfidf_data = pickle.load(f)

# Extract job and resume vectors
job_vectors = tfidf_data["jd_vectors"]
resume_vectors = tfidf_data["resume_vectors"]

print("Job vectors shape:", job_vectors.shape)
print("Resume vectors shape:", resume_vectors.shape)


Job vectors shape: (10, 5000)
Resume vectors shape: (100, 5000)


# Cosine Similarity

In [39]:
similarity_matrix = cosine_similarity(job_vectors, resume_vectors)
print("Similarity matrix shape:", similarity_matrix.shape)

Similarity matrix shape: (10, 100)


# Rank Resumes based on Similarity

In [43]:
import numpy as np

top_n = 5  # top N resumes per job
ranking_results = {}

for job_idx in range(similarity_matrix.shape[0]):
    scores = similarity_matrix[job_idx]

    # Correct descending sort to rank properly
    ranked_indices = np.argsort(scores)[::-1]  # descending
    top_resumes = ranked_indices[:top_n]
    top_scores = scores[top_resumes]

    ranking_results[f"Job_{job_idx+1}"] = {
        "Resume_IDs": top_resumes,
        "Similarity_Scores": top_scores
    }

# Display top resumes for Job_1
print("Top resumes for Job_1:")
for i, (rid, score) in enumerate(zip(ranking_results["Job_1"]["Resume_IDs"], ranking_results["Job_1"]["Similarity_Scores"]), 1):
    print(f"Rank {i}: Resume {rid} — Similarity: {score:.4f}")


Top resumes for Job_1:
Rank 1: Resume 79 — Similarity: 0.1387
Rank 2: Resume 16 — Similarity: 0.1296
Rank 3: Resume 35 — Similarity: 0.1139
Rank 4: Resume 70 — Similarity: 0.0996
Rank 5: Resume 92 — Similarity: 0.0757


# Display Rankings for All Jobs

In [44]:
# Display rankings for all 10 jobs
for job, data in ranking_results.items():
    print(f"\n=== {job} ===")
    for i, (resume_id, score) in enumerate(zip(data["Resume_IDs"], data["Similarity_Scores"]), 1):
        print(f"Rank {i}: Resume {resume_id} — Similarity: {score:.4f}")



=== Job_1 ===
Rank 1: Resume 79 — Similarity: 0.1387
Rank 2: Resume 16 — Similarity: 0.1296
Rank 3: Resume 35 — Similarity: 0.1139
Rank 4: Resume 70 — Similarity: 0.0996
Rank 5: Resume 92 — Similarity: 0.0757

=== Job_2 ===
Rank 1: Resume 28 — Similarity: 0.1405
Rank 2: Resume 92 — Similarity: 0.0966
Rank 3: Resume 35 — Similarity: 0.0906
Rank 4: Resume 79 — Similarity: 0.0737
Rank 5: Resume 76 — Similarity: 0.0710

=== Job_3 ===
Rank 1: Resume 79 — Similarity: 0.0961
Rank 2: Resume 34 — Similarity: 0.0804
Rank 3: Resume 40 — Similarity: 0.0763
Rank 4: Resume 87 — Similarity: 0.0721
Rank 5: Resume 16 — Similarity: 0.0641

=== Job_4 ===
Rank 1: Resume 28 — Similarity: 0.1250
Rank 2: Resume 9 — Similarity: 0.1174
Rank 3: Resume 20 — Similarity: 0.1062
Rank 4: Resume 22 — Similarity: 0.0917
Rank 5: Resume 79 — Similarity: 0.0877

=== Job_5 ===
Rank 1: Resume 43 — Similarity: 0.0580
Rank 2: Resume 35 — Similarity: 0.0545
Rank 3: Resume 41 — Similarity: 0.0542
Rank 4: Resume 92 — Similarit

# Save Output File in Csv

In [45]:
import pandas as pd

# Convert ranking_results into a DataFrame
df_results = pd.DataFrame([
    {"Job": job,
     "Rank": i+1,
     "Resume_ID": rid,
     "Similarity_Score": score}
    for job, data in ranking_results.items()
    for i, (rid, score) in enumerate(zip(data["Resume_IDs"], data["Similarity_Scores"]))
])

# Save to CSV for Module 5 evaluation
df_results.to_csv("module5_resume_ranking.csv", index=False)
print("✅ Rankings saved as CSV for Module 5: 'module5_resume_ranking.csv'")


✅ Rankings saved as CSV for Module 5: 'module5_resume_ranking.csv'
