**Install Libraries**

In [1]:
!pip install openai pymupdf python-docx pandas numpy scikit-learn plotly tenacity requests reportlab

Collecting pymupdf
  Downloading pymupdf-1.25.3-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (3.4 kB)
Collecting python-docx
  Downloading python_docx-1.1.2-py3-none-any.whl.metadata (2.0 kB)
Collecting reportlab
  Downloading reportlab-4.3.1-py3-none-any.whl.metadata (1.7 kB)
Downloading pymupdf-1.25.3-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (20.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m20.0/20.0 MB[0m [31m58.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading python_docx-1.1.2-py3-none-any.whl (244 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m244.3/244.3 kB[0m [31m19.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading reportlab-4.3.1-py3-none-any.whl (1.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m75.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: reportlab, python-docx, pymupdf
Successfully installed pymupdf-1.25.3 python-docx-1.1.2 rep

**Upload the ZIP File with Resumes**

In [2]:
from google.colab import files

uploaded = files.upload()


Saving ACCOUNTANT.zip to ACCOUNTANT.zip


**Extract ZIP Contents**

In [3]:
import zipfile
import os

zip_filename = "ACCOUNTANT.zip"
extract_path = "ACCOUNTANT"

# Extract the ZIP file
with zipfile.ZipFile(zip_filename, 'r') as zip_ref:
    zip_ref.extractall()


**Read All PDF Resumes in the Folder**

In [4]:
import glob

resume_folder = "ACCOUNTANT"

resume_files = glob.glob(os.path.join(resume_folder, "*.pdf"))

print(f"Found {len(resume_files)} resumes:", resume_files)

Found 5 resumes: ['ACCOUNTANT/98559931.pdf', 'ACCOUNTANT/80053367.pdf', 'ACCOUNTANT/87635012.pdf', 'ACCOUNTANT/78403342.pdf', 'ACCOUNTANT/82649935.pdf']


**Extract text from PDF or DOCX files**

In [5]:
import fitz
import docx
import os
import tempfile
import re

def extract_text_from_pdf(file_path):
    """Extract text from a PDF file."""
    text = ""
    with fitz.open(file_path) as doc:
        for page in doc:
            text += page.get_text("text") + "\n"
    return text.strip()

def extract_text_from_docx(file_path):
    """Extract text from a DOCX file."""
    doc = docx.Document(file_path)
    return "\n".join([para.text for para in doc.paragraphs]).strip()

def extract_text(file_path):
    """Determine file type and extract text accordingly."""
    if file_path.endswith(".pdf"):
        return extract_text_from_pdf(file_path)
    elif file_path.endswith(".docx"):
        return extract_text_from_docx(file_path)
    else:
        raise ValueError("Unsupported file type. Upload a PDF or DOCX file.")

def remove_gender_bias(text):
    """Replace gender-coded words to ensure neutrality."""
    gendered_words = {
        "he": "they", "she": "they",
        "him": "them", "her": "them",
        "his": "theirs", "hers": "theirs",
        "manpower": "workforce",
        "chairman": "chairperson",
        "salesman": "salesperson",
        "policeman": "police officer",
        "fireman": "firefighter",
        "maternity leave": "parental leave"
    }
    for word, neutral in gendered_words.items():
        text = re.sub(rf'\b{word}\b', neutral, text, flags=re.IGNORECASE)
    return text


**Use Openrouter.ai to Analyze Resumes**

In [9]:
import requests
import json

OPENROUTER_API_KEY = "Your key here"  # Replace with your key

def analyze_resume(text, job_desc):
    """Send resume text to OpenRouter AI for analysis."""
    url = "https://openrouter.ai/api/v1/chat/completions"

    headers = {
        "Authorization": f"Bearer {OPENROUTER_API_KEY}",
        "Content-Type": "application/json",
        "HTTP-Referer": "your-app-name"  # Replace with a unique name for tracking
    }

    data = {
        "model": "mistralai/mistral-7b-instruct",  # Other options: "openai/gpt-3.5-turbo", "anthropic/claude-instant-v1"
        "messages": [
            {"role": "system", "content": "You are a resume analysis expert. Respond only with valid JSON."},
            {"role": "user", "content": f"""
            Please analyze the following resume text and provide insights in the following categories:
            - Skills
            - Experience
            - Education
            - Certifications
            - Domain Expertise

            Additionally, provide actionable feedback on how the candidate can improve their resume to better match the following job description.

            Job Description: {job_desc}

            Resume Text: {text}

            Provide the analysis in valid JSON format with these exact keys: skills, experience, education, certifications, domain, feedback.
            """}
        ],
        "temperature": 0.7
    }

    response = requests.post(url, headers=headers, json=data)

    if response.status_code == 200:
        try:
            content = response.json()["choices"][0]["message"]["content"].strip()
            if not content.startswith("{"):
                content = content[content.find("{"):content.rfind("}") + 1]
            return json.loads(content)
        except json.JSONDecodeError:
            return {"error": "Invalid JSON response from OpenRouter"}
    else:
        return {"error": f"API call failed: {response.status_code} - {response.text}"}

def extract_text(file_path):
    """Determine file type, extract text, and neutralize gender bias."""
    if file_path.endswith(".pdf"):
        text = extract_text_from_pdf(file_path)
    elif file_path.endswith(".docx"):
        text = extract_text_from_docx(file_path)
    else:
        raise ValueError("Unsupported file type. Upload a PDF or DOCX file.")

    return remove_gender_bias(text)


**Process and Analyze Resumes**

In [16]:
for file_path in resume_files:
    print(f"Processing: {file_path}")
    resume_text = extract_text(file_path)


    job_description = """Accounting Job Description:
We are seeking a highly skilled and detail-oriented accountant to join our team. The ideal candidate will have experience in financial reporting, bookkeeping, tax compliance, and financial analysis. Proficiency in accounting software such as QuickBooks or SAP is preferred. Strong analytical skills, attention to detail, and the ability to prepare financial statements are essential. CPA certification is a plus.
"""

    analysis = analyze_resume(resume_text, job_description)

    print("\n Resume Analysis Results ")
    for key, value in analysis.items():
        print(f"{key.capitalize()}: {value}")

Processing: ACCOUNTANT/98559931.pdf

 Resume Analysis Results 
Skills: ['QuickBooks', 'Pivot Tables', 'PeopleSoft', 'People Tools', 'Microsoft Office (Word, Excel, PowerPoint)', 'CSC Point IN system', 'Pro Series', 'Interpersonal', 'Analytical', 'Problem Analysis', 'Technical', 'Organizational', 'Communication', 'Data Entry', 'Journal Entries (SAP)', 'Adaptable', 'Trainable', 'Multitasking', 'Leadership', 'Attention to Detail']
Experience: [{'job_title': 'Payroll Accountant', 'employer': 'Company Name ï¼\u200b City , State', 'duration': 'Apr 2014 to Current', 'responsibilities': ['Processed and managed payroll for over 500+ union and non-union employees on bi-weekly basis', 'Worked closely with CFO and Controller handling month end close projects, audit, and other special projects.', 'Mentored and trained C-level, Managers, and Staff on ADP (Human Resources and Payroll process functions).', 'Maintained a system of confidentiality and internal control safe guarded policies with complian

**Save and Export Results**

In [17]:
import pandas as pd
from reportlab.lib.pagesizes import letter
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
from reportlab.lib.styles import getSampleStyleSheet
from google.colab import files

# Define PDF filename and save it in a writable location
pdf_filename = "resume_analysis_results.pdf"

# Create PDF document
doc = SimpleDocTemplate(pdf_filename, pagesize=letter)
styles = getSampleStyleSheet()
elements = []

# Resume analysis data
all_resumes = [
    {
        "name": "Candidate 1",
        "Skills": ['QuickBooks', 'Pivot Tables', 'PeopleSoft', 'People Tools', 'Microsoft Office (Word, Excel, PowerPoint)',
                   'CSC Point IN system', 'Pro Series', 'Interpersonal skills', 'Analytical skills', 'Problem analysis',
                   'Technical skills', 'Organizational skills', 'Communication skills', 'Data entry skills', 'Journal entries'],
        "Experience": "Payroll Accountant at Company Name (Apr 2014 - Current); Accounts Receivable at Company Name (Nov 2013 - Apr 2014)",
        "Education": "Bachelor of Science in Accounting, William Paterson University, GPA: 3.3",
        "Feedback": "Strong payroll management experience; Should emphasize financial reporting and tax compliance."
    },
    {
        "name": "Candidate 2",
        "Skills": ['Accounting', 'Balance', 'Budget', 'Business Analyst', 'Call Center', 'Cash Management', 'Critical Thinking'],
        "Experience": "Accountant at Company Name (Current - Sep 2014); Auditor at Company Name (Feb 2007 - Aug 2011)",
        "Education": "Accounting Certificate, Cecil County Community College (2012)",
        "Feedback": "Extensive experience in accounting and auditing; Adding CPA certification would be beneficial."
    },
    {
        "name": "Candidate 3",
        "Skills": ['Financial statement analysis', 'Lawson Financial knowledge', 'PeopleSoft knowledge', 'General ledger accounting',
                   'Microsoft Excel', 'Peachtree', 'SAP', 'Sarbanes-Oxley', 'SQL'],
        "Experience": "Principal Accountant at Company Name (Mar 2014 - Current); Financial Reporting Consultant at Company Name (Jul 2003 - Mar 2005)",
        "Education": "MBA in Finance, Rutgers University (2002)",
        "Feedback": "Strong financial reporting background; Should highlight QuickBooks or SAP experience."
    },
    {
        "name": "Candidate 4",
        "Skills": ['BLS certified', 'Typing speed: 72 WPM', 'Computer skills: MS Word/Excel/PowerPoint/Outlook', 'SAP',
                   'Sound ethical decision-making', 'Patient advocacy', 'Post-operative care', 'Medical-Surgical experience'],
        "Experience": "General Accountant at Company Name (Apr 2010 - Apr 2011); Medical Biller/Collector at Company Name (Dec 2009 - Apr 2010)",
        "Education": "Bachelor of Science in Nursing, California State University, Northridge (2014 - Current)",
        "Feedback": "Resume focuses on nursing, should emphasize financial reporting, bookkeeping, tax compliance."
    },
    {
        "name": "Candidate 5",
        "Skills": ['Financial Reporting', 'Bookkeeping', 'Tax Compliance', 'Financial Analysis', 'MS Excel', 'Word (60 WPM)'],
        "Experience": "Senior Accountant at Company Name (May 2015 - Current); Finance Associate at Company Name (Aug 2011 - May 2015)",
        "Education": "Bachelor of Arts in Economics, City College of New York, GPA: 3.2 Honors Cum Laude",
        "Feedback": "Well-structured resume, could emphasize proficiency in QuickBooks or SAP."
    }
]

# Process all resumes and add content to PDF
for resume in all_resumes:
    elements.append(Paragraph(f" Resume Analysis: {resume['name']}", styles['Title']))
    elements.append(Spacer(1, 10))

    # Add Skills
    elements.append(Paragraph("<b>Skills:</b>", styles['Heading2']))
    elements.append(Paragraph(", ".join(resume["Skills"]), styles['Normal']))
    elements.append(Spacer(1, 10))

    # Add Experience
    elements.append(Paragraph("<b>Experience:</b>", styles['Heading2']))
    elements.append(Paragraph(resume["Experience"], styles['Normal']))
    elements.append(Spacer(1, 10))

    # Add Education
    elements.append(Paragraph("<b>Education:</b>", styles['Heading2']))
    elements.append(Paragraph(resume["Education"], styles['Normal']))
    elements.append(Spacer(1, 10))

    # Add Feedback
    elements.append(Paragraph("<b>Feedback:</b>", styles['Heading2']))
    for fb in resume["Feedback"].split("; "):  # Splitting feedback into new lines
        elements.append(Paragraph(f"- {fb}", styles['Normal']))  # Fixed indentation
    elements.append(Spacer(1, 20))

# Build the PDF in a writable location
doc.build(elements)

# Download the PDF file automatically
files.download(pdf_filename)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>