# ResumeIQ: AI-Powered Resume Understanding & Q&A

### ResumeIQ is a Python-based AI tool that reads multi-page resumes and extracts structured, recruiter-ready information using large language models (LLMs).

In [1]:
# Step 1: Imports
import pdfplumber
import re
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

In [2]:
# Step 2: Load PDF
pdf_path = "/Users/Akhila/Desktop/Py_Images/Resume.pdf"
text = ""
with pdfplumber.open(pdf_path) as pdf:
    for page in pdf.pages:
        page_text = page.extract_text()
        if page_text:
            text += page_text + " "

In [3]:
# Step 3: Clean the text
# Removes emails, phone numbers, links, newlines, extra spaces, bullets, etc.
clean_text = re.sub(r"\S+@\S+", "", text)  # emails
clean_text = re.sub(r"\+?\d[\d\s\-\(\)]{7,}", "", clean_text)  # phone numbers
clean_text = re.sub(r"(LinkedIn|Github|GitHub|www\.\S+)", "", clean_text)  # links
clean_text = re.sub(r"[\n\r]+", " ", clean_text)  # newlines
clean_text = re.sub(r"[\(\)\[\];•]+", "", clean_text)  # special chars
clean_text = re.sub(r"\s+", " ", clean_text)  # extra spaces

In [4]:
# Step 4: Chunk the text
def chunk_text(text, chunk_size=100):
    words = text.split()
    chunks = [" ".join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]
    return chunks

chunks = chunk_text(clean_text, chunk_size=120)

In [5]:
# Step 5: Load LLM
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-large")
model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-large")

In [6]:
# Step 6: Define Q&A function with better instructions
def answer_question(question, chunks, max_length=128):
    answers = []
    for chunk in chunks:
        prompt = (
            f"Read the following resume context and answer the question in **single short sentence**. "
            f"Ignore names, emails, phone numbers, links, publications, awards, dates, and formatting.\n\n"
            f"Context: {chunk}\nQuestion: {question}\nAnswer:"
        )
        inputs = tokenizer(prompt, return_tensors="pt", truncation=True)
        outputs = model.generate(**inputs, max_length=max_length)
        ans = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
        # Filter out any leftover irrelevant data
        ans = re.sub(r"(Akhila|ATMAKURU|LinkedIn|Github|Ph\.D\.|Master|University|Cranfield|Reading|UK|India)", "", ans, flags=re.IGNORECASE).strip()
        if ans and ans.lower() != "not mentioned":
            answers.append(ans)
    # Pick the most frequent / longest / most complete answer
    if not answers:
        return "Not mentioned"
    return max(answers, key=len)

In [7]:
# Step 7: Questions
questions = [
    "What is the main skill of the candidate?",
    "What is the candidate's highest education?",
    "What Skills does the candidate have?",
    "What are the publications?"
]

In [8]:
# Step 8: Get clean answers
for q in questions:
    ans = answer_question(q, chunks)
    print(f"Q: {q}\nA: {ans}\n")

Q: What is the main skill of the candidate?
A: AI and Machine Learning Engineer

Q: What is the candidate's highest education?
A: Doctor of Philosophy  in Artificial Intelligence

Q: What Skills does the candidate have?
A: Expert in designing, developing, and deploying advanced deep learning models and architectures for large-scale, data-driven applications.

Q: What are the publications?
A: "Transfer Learning for the Cognitive Staging Prediction in Alzheimer’s Disease" ACAIN 2024, LNCS, Springer,2025 "Sensitivity Analysis for Feature Importance in Predicting Alzheimer’s Disease" ACAIN 2023, LNCS, Springer,2024 "Improved Filter-Based Feature Selection Using Correlation and Clustering Techniques" LOD 2023, LNCS, Springer,2024.

