<a href="https://colab.research.google.com/github/hg210704/Resume-Parser/blob/main/ResumeParser.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install spacy PyPDF2
!python -m spacy download en_core_web_sm

Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Downloading pypdf2-3.0.1-py3-none-any.whl (232 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.6/232.6 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: PyPDF2
Successfully installed PyPDF2-3.0.1
Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m81.1 MB/s[0m eta [36m0:00:00[0m
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [2]:
import re
import PyPDF2
import spacy

In [3]:
# Load spaCy model
nlp = spacy.load("en_core_web_sm")

In [4]:
# FETCH AND READ RESUME
def fetch_resume_text(pdf_path):
    text = ""
    with open(pdf_path, "rb") as file:
        reader = PyPDF2.PdfReader(file)
        for page in reader.pages:
            text += page.extract_text() + "\n"
    return text

In [5]:
def extract_name(text):
    doc = nlp(text[:500])
    for ent in doc.ents:
        if ent.label_ == "PERSON":
            return ent.text
    return "Not Found"

In [6]:
def extract_email(text):
    pattern = r"[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+"
    match = re.search(pattern, text)
    return match.group() if match else "Not Found"

In [7]:
def extract_phone(text):
    pattern = r"(\+?\d{1,3}[\s-]?)?\(?\d{3}\)?[\s.-]?\d{3}[\s.-]?\d{4}"
    match = re.search(pattern, text)
    return match.group() if match else "Not Found"

In [8]:
SKILLS_DB = [
    "python", "java", "sql", "machine learning", "nlp",
    "deep learning", "django", "flask", "pandas", "numpy"
]

def extract_skills(text):
    text = text.lower()
    return list({skill.title() for skill in SKILLS_DB if skill in text})

In [9]:
def extract_education(text):
    education_keywords = [
        "bachelor", "master", "b.tech", "m.tech",
        "degree", "university", "college"
    ]
    education = []

    for line in text.split("\n"):
        if any(word in line.lower() for word in education_keywords):
            education.append(line.strip())

    return education

In [10]:
def extract_experience(text):
    experience_keywords = [
        "experience", "worked", "internship",
        "employment", "company", "project"
    ]
    experience = []

    for line in text.split("\n"):
        if any(word in line.lower() for word in experience_keywords):
            experience.append(line.strip())

    return experience

In [11]:
def parse_resume(pdf_path):
    text = fetch_resume_text(pdf_path)

    return {
        "Name": extract_name(text),
        "Email": extract_email(text),
        "Phone": extract_phone(text),
        "Skills": extract_skills(text),
        "Education": extract_education(text),
        "Experience": extract_experience(text)
    }


In [12]:
if __name__ == "__main__":
    resume_path = "/content/HimanshuG_Resume.pdf"
    parsed_data = parse_resume(resume_path)

    for key, value in parsed_data.items():
        print(f"{key}:")
        print(value)
        print("-" * 40)

Name:
Himanshu  Gotmukulwar
----------------------------------------
Email:
gotmukulwarhimanshu@gmail.com
----------------------------------------
Phone:
+91 8080678722
----------------------------------------
Skills:
['Machine Learning', 'Java', 'Sql', 'Python']
----------------------------------------
Education:
['Yeshwantrao  Chavan  College  of Engineering,  Nagpur  CGPA:  7.67/10', 'B.Tech.  Computer  Science  Engineering  (AIML)                                                                            Nov 2022  – Present', 'Shri Shivaji  Science  College,  Nagpur  Percentage:  77.33%']
----------------------------------------
Experience:
['Database : Experience  in MySQL', 'Projects', 'The project automate ingredient detection and analyzes compliance with food regulations.']
----------------------------------------
