In [9]:
import spacy
import PyPDF2
import re
import pandas as pd
import yake

In [10]:
# Provide a list of PDF files to parse and specify the output CSV file
pdf_files = ['resume.pdf', 'varenyav_resume.pdf', 'rachanaAlva.pdf', 'RESUME_WincelGlany.pdf', 'RamyaCV.pdf', 'cleona resume 1.pdf']
output_csv = 'resumes.csv'

In [11]:
nlp = spacy.load('en_core_web_sm')

In [12]:
def extract_resume_data(pdf_file):

    with open(pdf_file, 'rb') as file:
        reader = PyPDF2.PdfReader(file)
        resume_text = ''
        for page in reader.pages:
            resume_text += page.extract_text()
        
    doc = nlp(resume_text)
    entities = [ent.text for ent in doc.ents]
    
    # Extracting main keywords 
    kw_extractor = yake.KeywordExtractor(top=10, stopwords=None)
    keywords = kw_extractor.extract_keywords(resume_text)
    keywords = [kw for kw, v in keywords]

    # Extract email addresses using regular expression
    email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
    emails = re.findall(email_pattern, resume_text)

    # Extract phone numbers using regular expression
    phone_pattern = r'\+?\d[\d -]{8,12}\d'
    phones = re.findall(phone_pattern, resume_text)

    return resume_text, emails, phones, entities, keywords

In [13]:
def parse_resumes_to_dataframe(pdf_files):
    data = []

    for pdf_file in pdf_files:
        resume_text, emails, phones, Entities, Keywords = extract_resume_data(pdf_file)
        filename = pdf_file.split('/')[-1]

        data.append({'Filename': filename, 'Resume Text': resume_text, 'Emails': ', '.join(emails), 'Phone Numbers': ', '.join(phones), 'Entities':'\n- '.join(Entities), 'Keywords':'\n- '.join(Keywords)})

    df = pd.DataFrame(data)

    return df

In [14]:
df = parse_resumes_to_dataframe(pdf_files)
df

Unnamed: 0,Filename,Resume Text,Emails,Phone Numbers,Entities,Keywords
0,resume.pdf,\n \n \n \n \n \n \n \n \n \n \n \n \n \n \nK...,"aparnak743@gmail.com, hemasree71@gmail.com","9188049406, +91 9495870533",Board/University College/Institution Year\n-...,CAREER OBJECTIVE\n- APARNA Email\n- Java Scrip...
1,varenyav_resume.pdf,1 | P a g e \nVARENYA VINAY \n \n \nEmail: ...,"varenyav2000@gmail.com, hemalatha@staloysius.a...","+91 9739749703, +91 9495870533",1\n- linkedin.com/in/varenya\n- Big Data Analy...,Education Examination Board\n- QUALIFICATION S...
2,rachanaAlva.pdf,RACHANAALVA\nEmail:2117040rachana@staloysius.a...,2117040rachana@staloysius.ac,"+917034609485, +919495870533",inMobile:+917034609485\n- InstitutionYear\n- P...,Autonomous\n- Mangalore\n- Android\n- Duration...
3,RESUME_WincelGlany.pdf,\n \n WINCEL GLANY PAIS ...,"wincelpais@gmail.com, hemalatha@staloysius.ac....","7349418607, 09-07-2000, +91 9495870533, +91 82...",7349418607\n- Board/Unive \n\n- Big Data \nA...,Aloysius College\n- Pre University College\n- ...
4,RamyaCV.pdf,2 0 2 3\n2 0 2 1\n2 0 1 8\n2 0 1 6\nR a m y a ...,,"6 7 1 1 2 4, 9 6 3 3 2 8 8, 9 7 4 1 9 6 5, 9 7...",3\n- 1\n- 8\n- 6\n- H\n- s h e r\n- l\n- l\n- ...,
5,cleona resume 1.pdf,\n \nCLEONA J MONTEIRO \nEmail: cleonamonte...,"cleonamonteiro2000@gmail.com, ruban@staloysius...","9108126684, 2016 -2017, 16-04-2000, +91 974196...",Software\n- INSTITUTION UNIVERSITY \nBOARD ...,IBM Developer Skills\n- Developer Skills Netwo...


Unnamed: 0,Filename,Resume Text,Emails,Phone Numbers,Entities,Keywords
0,resume.pdf,\n \n \n \n \n \n \n \n \n \n \n \n \n \n \nK...,"aparnak743@gmail.com, hemasree71@gmail.com","9188049406, +91 9495870533",Board/University College/Institution Year\n-...,CAREER OBJECTIVE\n- APARNA Email\n- Java Scrip...
1,varenyav_resume.pdf,1 | P a g e \nVARENYA VINAY \n \n \nEmail: ...,"varenyav2000@gmail.com, hemalatha@staloysius.a...","+91 9739749703, +91 9495870533",1\n- linkedin.com/in/varenya\n- Big Data Analy...,Education Examination Board\n- QUALIFICATION S...
2,rachanaAlva.pdf,RACHANAALVA\nEmail:2117040rachana@staloysius.a...,2117040rachana@staloysius.ac,"+917034609485, +919495870533",inMobile:+917034609485\n- InstitutionYear\n- P...,Autonomous\n- Mangalore\n- Android\n- Duration...
3,RESUME_WincelGlany.pdf,\n \n WINCEL GLANY PAIS ...,"wincelpais@gmail.com, hemalatha@staloysius.ac....","7349418607, 09-07-2000, +91 9495870533, +91 82...",7349418607\n- Board/Unive \n\n- Big Data \nA...,Aloysius College\n- Pre University College\n- ...
4,RamyaCV.pdf,2 0 2 3\n2 0 2 1\n2 0 1 8\n2 0 1 6\nR a m y a ...,,"6 7 1 1 2 4, 9 6 3 3 2 8 8, 9 7 4 1 9 6 5, 9 7...",3\n- 1\n- 8\n- 6\n- H\n- s h e r\n- l\n- l\n- ...,
5,cleona resume 1.pdf,\n \nCLEONA J MONTEIRO \nEmail: cleonamonte...,"cleonamonteiro2000@gmail.com, ruban@staloysius...","9108126684, 2016 -2017, 16-04-2000, +91 974196...",Software\n- INSTITUTION UNIVERSITY \nBOARD ...,IBM Developer Skills\n- Developer Skills Netwo...


Unnamed: 0,Filename,Resume Text,Emails,Phone Numbers,Entities,Keywords
0,resume.pdf,\n \n \n \n \n \n \n \n \n \n \n \n \n \n \nK...,"aparnak743@gmail.com, hemasree71@gmail.com","9188049406, +91 9495870533",Board/University College/Institution Year\n-...,CAREER OBJECTIVE\n- APARNA Email\n- Java Scrip...
1,varenyav_resume.pdf,1 | P a g e \nVARENYA VINAY \n \n \nEmail: ...,"varenyav2000@gmail.com, hemalatha@staloysius.a...","+91 9739749703, +91 9495870533",1\n- linkedin.com/in/varenya\n- Big Data Analy...,Education Examination Board\n- QUALIFICATION S...
2,rachanaAlva.pdf,RACHANAALVA\nEmail:2117040rachana@staloysius.a...,2117040rachana@staloysius.ac,"+917034609485, +919495870533",inMobile:+917034609485\n- InstitutionYear\n- P...,Autonomous\n- Mangalore\n- Android\n- Duration...
3,RESUME_WincelGlany.pdf,\n \n WINCEL GLANY PAIS ...,"wincelpais@gmail.com, hemalatha@staloysius.ac....","7349418607, 09-07-2000, +91 9495870533, +91 82...",7349418607\n- Board/Unive \n\n- Big Data \nA...,Aloysius College\n- Pre University College\n- ...
4,RamyaCV.pdf,2 0 2 3\n2 0 2 1\n2 0 1 8\n2 0 1 6\nR a m y a ...,,"6 7 1 1 2 4, 9 6 3 3 2 8 8, 9 7 4 1 9 6 5, 9 7...",3\n- 1\n- 8\n- 6\n- H\n- s h e r\n- l\n- l\n- ...,
5,cleona resume 1.pdf,\n \nCLEONA J MONTEIRO \nEmail: cleonamonte...,"cleonamonteiro2000@gmail.com, ruban@staloysius...","9108126684, 2016 -2017, 16-04-2000, +91 974196...",Software\n- INSTITUTION UNIVERSITY \nBOARD ...,IBM Developer Skills\n- Developer Skills Netwo...


In [15]:
df.to_csv(output_csv)

In [16]:
print(df['Keywords'][3])

Aloysius College
- Pre University College
- GLANY PAIS Email
- Passing Grade Percentage
- Exploratory Data Analysis
- Big Data Analytics
- Great Learning PROJECT
- Mangalore University
- Machine Learning
- Tableau Machine Learning
Aloysius College
- Pre University College
- GLANY PAIS Email
- Passing Grade Percentage
- Exploratory Data Analysis
- Big Data Analytics
- Great Learning PROJECT
- Mangalore University
- Machine Learning
- Tableau Machine Learning
Aloysius College
- Pre University College
- GLANY PAIS Email
- Passing Grade Percentage
- Exploratory Data Analysis
- Big Data Analytics
- Great Learning PROJECT
- Mangalore University
- Machine Learning
- Tableau Machine Learning
