In [None]:
# STEP 1: Install required packages
!pip install -q sentence-transformers PyPDF2 scikit-learn pandas

# STEP 2: Upload JD CSV and resume PDFs
from google.colab import files
import os

print("üìÅ Upload your job_description.csv file:")
jd_file = files.upload()

print("\nüìÑ Upload your resume PDF files:")
resume_files = files.upload()

os.makedirs("resumes", exist_ok=True)
for filename in resume_files:
    with open(f"resumes/{filename}", "wb") as f:
        f.write(resume_files[filename])


In [None]:
# STEP 3: Read and summarize JDs with Titles
import pandas as pd

jd_df = pd.read_csv(next(iter(jd_file)))
print("üìÑ Job Descriptions Loaded:\n", jd_df.head())

# Identify title & description columns
title_col = [col for col in jd_df.columns if 'title' in col.lower()][0]
desc_col = [col for col in jd_df.columns if 'description' in col.lower()][0]

# Combine both for better semantic embedding
combined_jds = (jd_df[title_col] + " - " + jd_df[desc_col]).fillna("").tolist()
job_titles = jd_df[title_col].fillna("Unknown Role").tolist()


In [None]:
# STEP 4: Extract text from resumes
import PyPDF2

def extract_text_from_pdf(file_path):
    text = ""
    with open(file_path, "rb") as f:
        reader = PyPDF2.PdfReader(f)
        for page in reader.pages:
            text += page.extract_text() or ""
    return text

resumes_text = []
resume_names = []

for file in os.listdir("resumes"):
    if file.endswith(".pdf"):
        resume_text = extract_text_from_pdf(f"resumes/{file}")
        resumes_text.append(resume_text)
        resume_names.append(file)


In [None]:
from sentence_transformers import SentenceTransformer, util

print("üìä Embedding JDs and Resumes...\n")
model = SentenceTransformer('all-MiniLM-L6-v2')

jd_embeddings = model.encode(combined_jds, convert_to_tensor=True)
resume_embeddings = model.encode(resumes_text, convert_to_tensor=True)

print("üìä Matching Resumes with JDs...\n")
threshold = 0.40  # You can adjust this

shortlist = []  # For console viewing
matches = {}    # For Mistral or automation

for i, combined_jd in enumerate(combined_jds):
    print(f"\nüîπ JD {i+1} ({job_titles[i]}):\n{combined_jd[:100]}...\n")
    scores = util.cos_sim(jd_embeddings[i], resume_embeddings)[0]

    jd_matches = []  # Store match info for current JD

    for j, score in enumerate(scores):
        candidate_data = {
            "resume": resume_names[j],
            "score": float(score),
            "resume_text": resumes_text[j]
        }

        if score >= threshold:
            print(f"‚úÖ {resume_names[j]} - Match Score: {score:.2f}")
            shortlist.append({
                "resume": resume_names[j],
                "job_title": job_titles[i],
                "score": float(score)
            })
            jd_matches.append(candidate_data)
        else:
            print(f"‚ùå {resume_names[j]} - Match Score: {score:.2f}")

    matches[job_titles[i]] = jd_matches


In [None]:
from collections import defaultdict

print("üìä Displaying Top 5 Resumes for Each Job Description...\n")

# Group results by JD
matches = defaultdict(list)
for entry in shortlist:
    matches[entry["job_title"]].append({
        "resume": entry["resume"],
        "score": entry["score"]
    })

# Sort and display top 5
for jd_title, candidates in matches.items():
    print(f"\nüß† Job Title: {jd_title}\n" + "-"*50)
    # Sort candidates by score
    top_matches = sorted(candidates, key=lambda x: x["score"], reverse=True)[:5]

    for idx, candidate in enumerate(top_matches, start=1):
        print(f"{idx}. {candidate['resume']} - Score: {candidate['score']:.2f}")


In [None]:
import sqlite3

# Connect to SQLite DB (creates file if it doesn't exist)
conn = sqlite3.connect("matches.db")
cursor = conn.cursor()

# Create table to store matches
cursor.execute("""
CREATE TABLE IF NOT EXISTS resume_matches (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    job_title TEXT,
    resume_name TEXT,
    score REAL
)
""")
conn.commit()


In [None]:
# Insert shortlisted matches into DB
for entry in shortlist:
    cursor.execute("""
    INSERT INTO resume_matches (job_title, resume_name, score)
    VALUES (?, ?, ?)
    """, (entry["job_title"], entry["resume"], entry["score"]))

conn.commit()


In [None]:
job_titles_in_db = cursor.execute("SELECT DISTINCT job_title FROM resume_matches").fetchall()

for (job_title,) in job_titles_in_db:
    print(f"\nüß† Job Title: {job_title}\n" + "-"*50)

    top_resumes = cursor.execute("""
    SELECT resume_name, score
    FROM resume_matches
    WHERE job_title = ?
    ORDER BY score DESC
    LIMIT 5
    """, (job_title,)).fetchall()

    for idx, (resume, score) in enumerate(top_resumes, start=1):
        print(f"{idx}. {resume} - Score: {score:.2f}")


In [None]:
import sqlite3

conn = sqlite3.connect("matches.db")
cursor = conn.cursor()

job_title = input("Enter Job Title: ")

# Fetch top 5 resumes for that job title
cursor.execute("""
SELECT resume_name, score
FROM resume_matches
WHERE job_title = ?
ORDER BY score DESC
LIMIT 5
""", (job_title,))
results = cursor.fetchall()

print(f"\nTop 5 resumes for '{job_title}':")
for i, (resume, score) in enumerate(results, start=1):
    print(f"{i}. {resume} - Score: {score:.2f}")

conn.close()


In [None]:
from collections import defaultdict

top_matches_by_jd = defaultdict(list)

for match in shortlist:
    top_matches_by_jd[match['job_title']].append(match)

# Sort and trim to top 5
for job_title in top_matches_by_jd:
    top_matches_by_jd[job_title] = sorted(top_matches_by_jd[job_title], key=lambda x: x['score'], reverse=True)[:5]


In [None]:
def generate_interview_email(candidate_name, job_title, interview_date, interview_time, mode):
    return f"""\
Subject: Interview Invitation for {job_title} Role

Dear {candidate_name},

We are pleased to inform you that you have been shortlisted for the {job_title} position based on your resume.

We would like to invite you for an interview scheduled on {interview_date} at {interview_time}, to be conducted in {mode} mode.

Kindly confirm your availability by replying to this email. We look forward to speaking with you.

Best regards,
Recruitment Team
AutoHire AI
"""

# Example: generating for top 5 of a JD
from datetime import datetime, timedelta

interview_date = (datetime.today() + timedelta(days=2)).strftime("%B %d, %Y")
interview_time = "11:00 AM"
mode = "online"

for jd_title, matches in top_matches_by_jd.items():
    print(f"\nüì© Emails for Job Title: {jd_title}\n" + "-"*50)
    for candidate in matches:
        name = candidate["resume"].replace(".pdf", "").replace("_", " ").title()
        email = generate_interview_email(name, jd_title, interview_date, interview_time, mode)
        print(email)
        print("-" * 50)
