In [None]:
# ----- installing required libraries -----
!pip install -qU google-genai       # Google Gen AI SDK (Gemini)
!pip install -q pdfplumber python-docx nltk gradio fpdf



In [None]:
# importing libs, setting up a few helpers.
import os
import io
import re
import json
from collections import Counter

# resume handling
import pdfplumber
import docx

# file export
from docx import Document
from fpdf import FPDF

# NLP
import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
from nltk.tokenize import word_tokenize

# trying to import Google GenAI client - there are two common package names depending on environment
try:
    # we can use google-genai / python-genai (it is newer)
    from google import genai
    GENAI_CLIENT_TYPE = "google_genai"
except Exception:
    try:
        import google.generativeai as genai
        GENAI_CLIENT_TYPE = "google_generativeai"
    except Exception:
        raise Exception("GenAI SDK not found. Re-run the install cell and restart runtime if needed.")

print("GenAI client type:", GENAI_CLIENT_TYPE)


GenAI client type: google_genai


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


In [None]:
# Putting my Gemini Pro API key here.
# setting environment variable GEMINI_API_KEY in this Colab

GEMINI_API_KEY = ""  # <-- PASTING MY GEMINI PRO API KEY HERE

# If I left GEMINI_API_KEY blank and I set the env var above, it's ok.
if not GEMINI_API_KEY:
    GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY") or os.environ.get("GOOGLE_API_KEY")

if not GEMINI_API_KEY:
    raise Exception("No Gemini API key found. Please paste it into GEMINI_API_KEY or set GEMINI_API_KEY environment variable.")

# Configurering the client
if GENAI_CLIENT_TYPE == "google_genai":
    # creating client; the SDK will pick the env var or you can pass key directly
    client = genai.Client(api_key=GEMINI_API_KEY)
else:
    genai.configure(api_key=GEMINI_API_KEY)
    client = genai  # will use name 'client' in later code for compatibility

print("Gemini API key set. Ready to call the model.")


Gemini API key set. Ready to call the model.


In [None]:
# After running this cell we can upload the resume (pdf or docx).

# CHANGIN THE JOB_DESCRYPTION IS MANDATORY

from google.colab import files

print("Please upload your resume (PDF or DOCX).")
uploaded = files.upload()  # choose file from dialog

# first uploaded file
resume_filename = list(uploaded.keys())[0]
print("Uploaded:", resume_filename)

# pastING the job description in the variable below (or replacing its contents).
job_description = """
ML/AI Developer.
"""
print("Paste your Job Description into the 'job_description' variable in this cell and re-run if you want to edit it.")


Please upload your resume (PDF or DOCX).


Saving prabhakar rayal Resume.pdf to prabhakar rayal Resume (2).pdf
Uploaded: prabhakar rayal Resume (2).pdf
Paste your Job Description into the 'job_description' variable in this cell and re-run if you want to edit it.


In [None]:
# these functions will read pdf/docx and return clean text.
def extract_text_from_pdf(path):
    text = []
    with pdfplumber.open(path) as pdf:
        for page in pdf.pages:
            text.append(page.extract_text() or "")
    return "\n".join(text)

def extract_text_from_docx(path):
    doc = docx.Document(path)
    text = []
    for para in doc.paragraphs:
        text.append(para.text)
    return "\n".join(text)

def load_resume_text(filename):
    lower = filename.lower()
    if lower.endswith(".pdf"):
        return extract_text_from_pdf(filename)
    elif lower.endswith(".docx") or lower.endswith(".doc"):
        return extract_text_from_docx(filename)
    else:
        raise Exception("Unsupported resume format. Use PDF or DOCX.")

# Loading the resume text here
resume_text = load_resume_text(resume_filename)
print("----- Your Resume Preview (first 600 chars) -----")
print(resume_text[:600] + "...")


----- Your Resume Preview (first 600 chars) -----
Prabhakar Rayal
Email-id : prabhakarrayalarcy@gmail.com
Mobile No.: 9027677731,
www.linkedin.com/in/prabhakar-rayal-663968259
EDUCATION
• B.Tech in Computer Science | Graphic Era Hill University, Dehradun
2021 – 2025 | CGPA: 7.59/10
• 12th Grade (I.S.E) | Modern School, Rishikesh
2020 – 2021 | Percentage: 69.5%
• 10th Grade (I.C.S.E) | Modern School, Rishikesh
2018 – 2019 | Percentage: 60.4%
PROJECTS
• BGMI Tournament Website (HTML, CSS, JavaScript)
Feb 2022 – Apr 2022
Developed a web platform for BGMI players to register and participate in custom tournaments (frontend).
• Movie Recommender Sy...


In [None]:
# importing necessary libs that we will be needing below
import nltk

nltk.download('punkt')
nltk.download('punkt_tab')
nltk.download('averaged_perceptron_tagger')
nltk.download('maxent_ne_chunker')
nltk.download('words')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package maxent_ne_chunker is already up-to-date!
[nltk_data] Downloading package words to /root/nltk_data...
[nltk_data]   Package words is already up-to-date!


True

In [None]:
# extracting likely skills/keywords from JD using simple guideline.
def extract_skills_from_text(text, top_k=40):
    # simple tokenization, will keep word tokens that look like skills (alphanumeric + punctuation)
    tokens = [w.lower() for w in word_tokenize(text) if len(w) > 1]
    # removing stopwords-ish list (simple)
    stop = set(["the","and","with","for","of","to","in","a","on","is","experience","years","year","should","strong"])
    tokens = [t for t in tokens if t.isalnum() and t not in stop]
    # will capture common multiword skills via simple patterns (like 'machine learning', 'deep learning')
    multi_phrases = []
    lowered = text.lower()
    for phrase in ["machine learning","deep learning","data science","computer vision","natural language processing","nlp","react js","node js","git","docker","kubernetes","flask","django","fastapi","tensorflow","pytorch","rest api","sql","nosql","mongodb","postgresql","aws","gcp","azure","javascript","html","css","c++","c#","java","python"]:
        if phrase in lowered:
            multi_phrases.append(phrase)
    # will get most common single tokens
    most = [t for t,c in Counter(tokens).most_common(top_k)]
    # combinining
    skills = list(dict.fromkeys(multi_phrases + most))  # will preserve order, remove dupes
    return skills

jd_skills = extract_skills_from_text(job_description)
print("Extracted JD skills (top):", jd_skills[:30])


Extracted JD skills (top): ['developer']


In [None]:
# finding which JD skills are missing from the resume
def find_missing_skills(resume_text, skills_list):
    text = resume_text.lower()
    missing = [s for s in skills_list if s not in text]
    present = [s for s in skills_list if s in text]
    return present, missing

present_skills, missing_skills = find_missing_skills(resume_text, jd_skills)
print("Present skills (sample):", present_skills[:15])
print("Missing skills (sample):", missing_skills[:15])


Present skills (sample): []
Missing skills (sample): ['developer']


In [None]:
# these prompts are like the heart of the tool.
# Here we can also tweak the tone below (like - professional, friendly, concise).
analysis_prompt = f"""
You are an ATS resume optimisation assistant.
First, detect whether the resume is technical or non-technical.
- If technical, focus on adding role-specific hard skills, tools, and frameworks.
- If non-technical, focus on soft skills, relevant industry experience, and domain-specific keywords.
Do not insert irrelevant terms.

Resume text:
{resume_text}

Job description:
{job_description}

Step 1: Classify resume as technical or non-technical.
Step 2: Identify missing skills and keywords relevant to this domain.
Step 3: Suggest modifications for ATS optimisation.
"""

PROMPT_ANALYZE = """
You are a professional resume analyst. Given the candidate's resume text and a job description,
1) list missing or weakly-present skills from the JD that should be added,
2) suggest concise bullet points to add in Experience or Projects to reflect those skills (write 2-3 sample bullets each),
3) give an estimated ATS keyword match percentage (simple estimate).

Resume:
{resume}

Job Description:
{jd}

Return the result as JSON with fields: missing_skills, suggestions (list), ats_estimate (0-100), explanation (short).
"""

PROMPT_REWRITE = """
You are a professional resume writer. Rewrite the candidate's resume to match the given Job Description.
- Keep the candidate's facts (roles, company names, dates, project titles) intact.
- Improve wording, make bullets achievement-focused, add missing skills where appropriate (based on suggestions).
- Keep output in clean resume-format text with sections: Summary, Skills, Experience (with bullets), Projects, Education.
- Use a professional, concise tone suitable for tech job applications.

Resume (original):
{resume}

Job Description:
{jd}

Missing skills suggestions (optional, JSON list):
{suggestions}

Produce only the rewritten resume text (no extra commentary).
"""

# later we will fill in {resume}, {jd}, {suggestions}


In [None]:
# this cell sends the analysis prompt to Gemini and gets back the missing-skills + suggestions.
# We use a short system and user style prompt. We can also adjust model name if needed (like - gemini-1.5+/gemini-2.5-flash, etc).

MODEL_NAME = "gemini-2.5-pro"  # I have Gemini pro, so i am using the pro version.

analysis_prompt = PROMPT_ANALYZE.format(resume=resume_text[:5000], jd=job_description[:5000])  # we can cut to safe length

print("Sending analysis prompt to Gemini... (this may take a few seconds)")
if GENAI_CLIENT_TYPE == "google_genai":
    # this is new client style
    response = client.models.generate_content(model=MODEL_NAME, contents=analysis_prompt)
    raw_text = response.text
else:
    # this is older style
    resp = client.generate_text(model=MODEL_NAME, prompt=analysis_prompt)
    raw_text = resp.text if hasattr(resp, 'text') else str(resp)

print("Raw analysis output (preview):")
print(raw_text[:5000])

# Here we will attempt to parse JSON from output (many times Gemini returns readable text)
import re, json
json_text = None
try:
    # it tries to pull JSON object from text
    m = re.search(r"\{[\s\S]*\}", raw_text)
    if m:
        json_text = json.loads(m.group(0))
except Exception as e:
    print("Couldn't parse JSON automatically. We'll fall back to heuristics.")
    json_text = None

if json_text:
    suggestions = json_text.get("suggestions", [])
    missing_skills_from_ai = json_text.get("missing_skills", [])
    ats_estimate = json_text.get("ats_estimate", "N/A")
else:
    #  it will fallback and use our earlier missing_skills and create simple bullets
    missing_skills_from_ai = missing_skills
    suggestions = []
    for s in missing_skills_from_ai[:8]:
        suggestions.append(f"- Add experience / project bullet showing experience with {s} (quantify impact).")
    ats_estimate = None

print("Missing skills (final):", missing_skills_from_ai[:20])
print("Sample suggestions:", suggestions[:6])


Sending analysis prompt to Gemini... (this may take a few seconds)
Raw analysis output (preview):
```json
{
  "missing_skills": [
    "Deep Learning Frameworks (PyTorch, TensorFlow)",
    "Model Evaluation Metrics (e.g., Accuracy, Precision, Recall, PSNR, MSE)",
    "Model Deployment & API Development (Using frameworks like Flask)",
    "Advanced ML/DL Architectures (e.g., CNNs for image tasks)",
    "Version Control (Git, GitHub)"
  ],
  "suggestions": [
    {
      "skill_to_add": "Deep Learning Frameworks & Advanced Architectures",
      "bullet_points": [
        "Major Project: Implemented and trained Convolutional Neural Network (CNN) based autoencoders using TensorFlow/PyTorch to effectively denoise medical image data.",
        "Major Project: Engineered and compared multiple deep learning models, leveraging autoencoder architectures to significantly improve image clarity and data quality for diagnostic purposes."
      ]
    },
    {
      "skill_to_add": "Model Evaluation & Q

In [None]:
# now here weare asking Gemini to rewrite the resume using suggestions from the analysis step.
rewrite_prompt = PROMPT_REWRITE.format(resume=resume_text[:6000], jd=job_description[:4000], suggestions=json.dumps(suggestions))

print("Sending rewrite request to Gemini... (this will be the biggest call)")
if GENAI_CLIENT_TYPE == "google_genai":
    response = client.models.generate_content(model=MODEL_NAME, contents=rewrite_prompt)
    rewritten = response.text
else:
    resp = client.generate_text(model=MODEL_NAME, prompt=rewrite_prompt)
    rewritten = resp.text if hasattr(resp, 'text') else str(resp)

print("----- Rewritten resume preview -----")
print(rewritten[:6000])


Sending rewrite request to Gemini... (this will be the biggest call)
----- Rewritten resume preview -----
**Prabhakar Rayal**
prabhakarrayalarcy@gmail.com | (902) 767-7731 | LinkedIn: /in/prabhakar-rayal | GitHub: [Your GitHub URL]

---

### **Summary**

Driven and analytical Computer Science undergraduate specializing in Machine Learning and Artificial Intelligence. Proficient in Python, deep learning frameworks, and end-to-end model development, from data processing to API deployment. Seeking to leverage hands-on project experience in building and optimizing AI-powered solutions to contribute to an innovative ML/AI Developer role.

---

### **Skills**

*   **Programming Languages:** Python, Java, C++, SQL, JavaScript, HTML/CSS
*   **ML/AI Libraries & Frameworks:** Scikit-learn, Pandas, NumPy, TensorFlow, PyTorch, Tkinter
*   **Tools & Technologies:** Git, GitHub, Flask, VS Code, Google Colab, Jupyter Notebooks
*   **Cloud & Databases:** AWS (Basics), SQL
*   **Languages:** English (F

In [None]:
# Saveing the rewritten resume into a neat DOCX file for download.
# Adding cleaning so PDF export won't fail with special characters.

def text_to_docx(text, out_filename="optimized_resume.docx"):
    doc = Document()
    # will break text into chunks by double newlines for basic structure
    parts = [p.strip() for p in text.split("\n\n") if p.strip()]
    for p in parts:
        # Heuristic will detect headings (short text, all caps, or section keywords)
        if len(p) < 80 and (p.isupper() or any(h.lower() in p.lower() for h in
            ["summary", "skills", "experience", "education", "projects"])):
            doc.add_heading(p.strip(), level=2)
        else:
            for line in p.split("\n"):
                if line.strip().startswith("-") or line.strip().startswith("•"):
                    doc.add_paragraph(line.strip().lstrip("-• "), style='List Bullet')
                else:
                    doc.add_paragraph(line.strip())
    doc.save(out_filename)
    return out_filename

# Save DOCX
out_docx = text_to_docx(rewritten, out_filename="optimized_resume.docx")
print("Saved DOCX:", out_docx)

# Simple PDF export: clean up fancy characters so FPDF (latin-1) doesn't crash
import re

def clean_for_pdf(text):
    # wWill replace fancy quotes and dashes with normal ones
    replacements = {
        "–": "-", "—": "-", "’": "'", "‘": "'", "“": '"', "”": '"'
    }
    for bad, good in replacements.items():
        text = text.replace(bad, good)
    # Will remove any other non-latin1 chars
    return text.encode('latin-1', 'replace').decode('latin-1')

# This will convert DOCX text to PDF (basic formatting)
from fpdf import FPDF
def docx_to_pdf_simple(docx_text, out_pdf="optimized_resume.pdf"):
    pdf = FPDF()
    pdf.set_auto_page_break(auto=True, margin=15)
    pdf.add_page()
    pdf.set_font("Arial", size=12)
    safe_text = clean_for_pdf(docx_text)
    for line in safe_text.split("\n"):
        pdf.multi_cell(0, 7, txt=line)
    pdf.output(out_pdf)
    return out_pdf

# PDF save
txt = rewritten
out_pdf = docx_to_pdf_simple(txt, out_pdf="optimized_resume.pdf")
print("Saved PDF:", out_pdf)

# Provide download links in Colab
from google.colab import files as colab_files
print("You can download the files now:")
colab_files.download("optimized_resume.docx")
colab_files.download("optimized_resume.pdf")


Saved DOCX: optimized_resume.docx
Saved PDF: optimized_resume.pdf
You can download the files now:


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
# creating a small JSON report summarizing changes for HR (they asked for explanation)
report = {
    "original_resume_file": resume_filename,
    "optimized_resume_file": "optimized_resume.docx",
    "missing_skills_added": missing_skills_from_ai,
    "suggestions_applied": suggestions,
    "ats_estimate_before": None,   # you could add an estimate before by simple matching
    "ats_estimate_after": ats_estimate
}

with open("resume_change_report.json", "w") as f:
    json.dump(report, f, indent=2)

print("Saved report: resume_change_report.text")
colab_files.download("resume_change_report.txt")




Saved report: resume_change_report.text


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

#📄 ATS Resume Optimizer

An AI-powered tool that rewrites resumes to maximize Applicant Tracking System (ATS) match scores by aligning them with a given job description.
This project uses Google Gemini API for text analysis and rewriting, extracting missing skills, and producing an optimized, ATS-friendly version of the resume.

#🚀 Features

* Automatic Skill Extraction

Detects missing keywords, skills, and phrases from the job description.

* Technical & Non-Technical Resume Handling

Classifies resumes as technical or non-technical and optimizes accordingly.

* ATS Match Estimation

Provides an estimated ATS keyword match score before and after optimization.

* Professional Rewriting

Improves grammar, tone, and structure while keeping facts intact.

* DOCX & PDF Export

Outputs clean, recruiter-ready resumes in both .docx and .pdf formats.

* Change Report

Generates a JSON report summarizing modifications for transparency.

#🛠️ Tech Stack

    Language: Python 3.

    AI API: Google Gemini API. (THE API KEY I USED IN THIS PROJECT WILL BE AVAILABLE FOR 7 DAYS AFTER SUBMISSION)

    Libraries: google-generativeai (Gemini API client),

    nltk (skill extraction), python-docx (DOCX creation),

    fpdf (PDF generation), re (text cleaning).

#📂 Project Structure

resume-optimizer/

├── Resume_Optimizer.ipynb       # Main notebook

├── requirements.txt             # Dependencies

├── sample_resume.docx           # Example resume

├── job_description.txt          # Example job
description

├── optimized_resume.docx        # Output DOCX

├── optimized_resume.pdf         # Output PDF

├── resume_change_report.json    # Summary of changes

└── README.md # Project documentation

    ⚙️ Setup Instructions

1️⃣ Clone the Repository

git clone https://github.com/Prabhakarrayal

    cd resume-optimizer

2️⃣ Install Dependencies


    pip install -r requirements.txt

3️⃣ Set up Google Gemini API Key

Go to Google AI Studio

Generate an API key
Save it in a .env file:

    GEMINI_API_KEY=your_api_key_here

4️⃣ Run the Notebook
In Google Colab or VS Code Jupyter Extension:


    jupyter notebook Resume_Optimizer.ipynb

#🖥️ Usage

Upload Resume (DOCX or TXT format)

Paste Job Description in the input cell

Run All Cells

Download Optimized Resume & Report

#📊 Example Output
ATS Match Before: 62%

ATS Match After: 88%

Added Skills: ["Agile Methodology", "SQL", "Stakeholder Management"]

#📎 Resources
Google Gemini API Docs

Python-docx Documentation

FPDF Documentation

NLTK Documentation

