In [1]:
import os
import PyPDF2
import pandas as pd
from sentence_transformers import SentenceTransformer, util

In [2]:
model = SentenceTransformer('all-MiniLM-L6-v2')

In [3]:
def extract_text_from_pdf(file_path):
    with open(file_path, 'rb') as f:
        reader = PyPDF2.PdfReader(f)
        text = " ".join(page.extract_text() or "" for page in reader.pages)
    return text

In [4]:
def extract_sections(text):
    sections = {
        "skills": "",
        "experience": "",
        "projects": "",
        "other": ""
    }
    current = "other"
    for line in text.splitlines():
        line = line.strip()
        if not line:
            continue
        l = line.lower()
        if "skill" in l:
            current = "skills"
        elif "experience" in l:
            current = "experience"
        elif "project" in l:
            current = "projects"
        sections[current] += line + " "
    return sections


In [14]:
def weighted_score(resume_sections, jd_text):
    jd_emb = model.encode(jd_text, convert_to_tensor=True)
    weights = {
        "skills": 0.1,
        "experience": 0.6,
        "projects": 0.3
    }

    score = 0.0
    for sec, text in resume_sections.items():
        if text.strip() == "":
            continue
        emb = model.encode(text, convert_to_tensor=True)
        sim = util.pytorch_cos_sim(emb, jd_emb).item()
        score += weights.get(sec, 0.0) * sim
    return round(score * 100, 2)

In [15]:
resume_dir = "resumes/"
# jd_path = "JDs/AI_intern_Armada.txt"
jd_path = "JDs/Data_engineering_intern_LiveRamp.txt"

with open(jd_path, 'r', encoding='utf-8') as f:
    jd_text = f.read()

results = []



In [16]:
for file in os.listdir(resume_dir):
    if not file.endswith(".pdf"):
        continue
    fpath = os.path.join(resume_dir, file)
    text = extract_text_from_pdf(fpath)
    sections = extract_sections(text)
    score = weighted_score(sections, jd_text)
    results.append({"Resume": file, "Weighted Score": score})

results_df = pd.DataFrame(results).sort_values("Weighted Score", ascending=False)
results_df.reset_index(drop=True, inplace=True)

In [17]:
## latest
results_df

Unnamed: 0,Resume,Weighted Score
0,Dhruvraj_resume_May18.pdf,42.59
1,Dhruvraj_resume_MSDS.pdf,42.23
2,Dhruvraj_resume_USHunger.pdf,38.99
3,Dhruvraj_resume_Mar11.pdf,38.17
4,Dhruvraj_Resume_intern_rocket.pdf,38.17
5,Dhruvraj_resume_image_analytics.pdf,8.08


In [29]:
import ollama
from math import ceil

def summarize_batch_with_ollama(jd_text, resume_batch):
    batch_prompts = ""
    for idx, resume_text in enumerate(resume_batch, start=1):
        batch_prompts += f"\nResume {idx}:\n\"\"\"{resume_text[:2000]}\"\"\"\n"

    prompt = f"""
You are an AI assistant helping recruiters evaluate candidates by comparing their resumes with a job description. For each resume, provide:

1. A 40-word summary of how the resume aligns with the JD.
2. A bullet list of 5 key relevant skills.
3. A one-line highlight of the candidate’s relevant experience.

Job Description:
\"\"\"{jd_text}\"\"\"

{batch_prompts}

Format output as:

Resume 1:
Summary: ...
Relevant Skills:
- ...
Main Highlight: ...

Resume 2:
...
"""

    try:
        response = ollama.chat(
            model='llama3.2',
            messages=[{"role": "user", "content": prompt}]
        )
        return response['message']['content'].strip()
    except Exception as e:
        return f"Error: {e}"

In [30]:
def summarize_resumes_ollama(jd_text, resume_texts, output_path="resume_summaries.txt"):
    batch_size = 3
    results = []

    for i in range(0, len(resume_texts), batch_size):
        batch = resume_texts[i:i+batch_size]
        result = summarize_batch_with_ollama(jd_text, batch)
        results.append(result)

    all_summary_text = "\n\n".join(results)
    with open(output_path, "w", encoding="utf-8") as f:
        f.write(all_summary_text)

    print(f"✅ Summaries saved to {output_path}")
    return all_summary_text


In [31]:
resume_paths = [
    "resumes/Dhruvraj_resume_May18.pdf",
    "resumes/Dhruvraj_resume_MSDS.pdf",
    "resumes/Dhruvraj_resume_USHunger.pdf",
    "resumes/Dhruvraj_Resume_intern_rocket.pdf"
]

resume_texts = [extract_text_from_pdf(path) for path in resume_paths]
jd_text = open("JDs/Data_engineering_intern_LiveRamp.txt", "r", encoding="utf-8").read()

summarize_resumes_ollama(jd_text, resume_texts, "summary_ollama_output.txt")


✅ Summaries saved to summary_ollama_output.txt


"Here is the evaluation of the three resumes against the job description:\n\n**Resume 1**\n\nSummary: The resume aligns with the JD as it showcases Dhruvraj's experience in data pipeline development, ETL processes, and analytics solutions. The candidate has worked on various projects that demonstrate their proficiency in Python, SQL, and big data tools.\n\nRelevant Skills:\n- Data pipeline development\n- ETL processes\n- Analytics solutions\n- Big data tools (PySpark, Hadoop, Kafka)\n- Cloud computing (AWS Suite)\n\nMain Highlight: Dhruvraj's experience as a Data Analyst at Draup Business Solutions Bangalore, where he designed and deployed high-performance ETL pipelines using PySpark and SQL on AWS EMR, improving data integrity by 35%.\n\n**Resume 2**\n\nSummary: The resume aligns with the JD as it highlights Dhruvraj's expertise in machine learning, deep learning, and big data. The candidate has worked on various projects that demonstrate their proficiency in machine learning framewor