In [1]:
import re
import pdfplumber
import docx

class ResumeParser:
    def __init__(self):
        self.phone_number_pattern = re.compile(r"(\+?\d{1,2}\s?)?(\(\d{1,4}\))?[.\-\s]?\d{1,5}[.\-\s]?\d{1,5}[.\-\s]?\d{1,9}")
        self.email_pattern = re.compile(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}")
        self.skills_list = ["python", "c++", "machine learning", "data analysis", "communication","artificial Intelligence"]

    def extract_contact_details(self, text):
        phone_numbers = re.findall(self.phone_number_pattern, text)
        emails = re.findall(self.email_pattern, text)
        return {"phone_numbers": phone_numbers, "emails": emails}

    def extract_work_experience(self, text):
        work_experience_pattern = re.compile(r"work\s*experience", re.IGNORECASE)
        matches = re.finditer(work_experience_pattern, text)
        return [text[match.end():].strip()[:200] for match in matches]

    def extract_education(self, text):
        education_pattern = re.compile(r"education", re.IGNORECASE)
        matches = re.finditer(education_pattern, text)
        return [text[match.end():].strip()[:200] for match in matches]

    def extract_skills(self, text):
        skills_found = [skill for skill in self.skills_list if skill in text.lower()]
        return skills_found

    def parse_pdf(self, pdf_path):
        with pdfplumber.open(pdf_path) as pdf:
            text = ''
            for page in pdf.pages:
                text += page.extract_text()
        return text

    def parse_docx(self, docx_path):
        doc = docx.Document(docx_path)
        text = '\n'.join([paragraph.text for paragraph in doc.paragraphs])
        return text

    def parse_txt(self, txt_path):
        with open(txt_path, 'r', encoding='utf-8') as file:
            text = file.read()
        return text

In [2]:
resume_parser = ResumeParser()

In [3]:
resume_pdf_path = 'D:/PC Data/my doc/My Resume - Abdullah Maroof.pdf'

pdf_text = resume_parser.parse_pdf(resume_pdf_path)

contact_details_pdf = resume_parser.extract_contact_details(pdf_text)
work_experience_pdf = resume_parser.extract_work_experience(pdf_text)
education_pdf = resume_parser.extract_education(pdf_text)
skills_pdf = resume_parser.extract_skills(pdf_text)

print("PDF Contact Details:", contact_details_pdf)
print("PDF Work Experience:", work_experience_pdf)
print("PDF Education:", education_pdf)
print("PDF Skills:", skills_pdf)

PDF Contact Details: {'phone_numbers': [('', ''), ('+92', ''), ('+92', ''), ('', ''), ('90', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', '')], 'emails': ['abdullahmaroof597@gamil.com']}
PDF Work Experience: []
PDF Education: ['DECEMBER 2023\nBS Artificial Intelligence / Superior University Lahore\nI had completed my Graduation in BS Artificial Intelligence Degree from Superior University. My CGPA is\n3.69. Main courses were Ma']
PDF Skills: ['python', 'machine learning', 'data analysis', 'communication']


In [4]:
resume_docx_path = 'D:/PC Data/my doc/My Resume - Abdullah Maroof.docx'

docx_text = resume_parser.parse_docx(resume_docx_path)

contact_details_docx = resume_parser.extract_contact_details(docx_text)
work_experience_docx = resume_parser.extract_work_experience(docx_text)
education_docx = resume_parser.extract_education(docx_text)
skills_docx = resume_parser.extract_skills(docx_text)

print("DOCX Contact Details:", contact_details_docx)
print("DOCX Work Experience:", work_experience_docx)
print("DOCX Education:", education_docx)
print("DOCX Skills:", skills_docx)

DOCX Contact Details: {'phone_numbers': [('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', '')], 'emails': []}
DOCX Work Experience: []
DOCX Education: []
DOCX Skills: ['python', 'machine learning', 'data analysis', 'communication']


In [5]:
resume_txt_path = 'C:/Users/Abdullah Maroof/Documents/CV.txt'
txt_text = resume_parser.parse_txt(resume_txt_path)
contact_details_txt = resume_parser.extract_contact_details(txt_text)
work_experience_txt = resume_parser.extract_work_experience(txt_text)
education_txt = resume_parser.extract_education(txt_text)
skills_txt = resume_parser.extract_skills(txt_text)

print("TXT Contact Details:", contact_details_txt)
print("TXT Work Experience:", work_experience_txt)
print("TXT Education:", education_txt)
print("TXT Skills:", skills_txt)

TXT Contact Details: {'phone_numbers': [('', ''), ('', ''), ('', '(555)'), ('', ''), ('', ''), ('', ''), ('', '')], 'emails': ['john.doe@example.com']}
TXT Work Experience: [':\nSoftware Developer\nABC Tech Solutions, Cityville, ST\nJune 2020 - Present\n- Developed and maintained web applications using Python and Django.\n- Collaborated with cross-functional teams to design and']
TXT Education: [':\nBachelor of Science in Computer Science\nXYZ University, Cityville, ST\nGraduated: May 2020\n\nWork Experience:\nSoftware Developer\nABC Tech Solutions, Cityville, ST\nJune 2020 - Present\n- Developed and m']
TXT Skills: ['python', 'data analysis']
