Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revert "Added the functionality to fetch all the Experiences and Educations" #5

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
201 changes: 83 additions & 118 deletions resumeparser/RP_RestAPI/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,172 +10,137 @@
from pdfminer.image import ImageWriter
import io


def parse(resume):
imagewriter = None
caching = True
laparams = LAParams()
retstr = io.StringIO()
rsrcmgr = PDFResourceManager(caching=caching)
device = TextConverter(rsrcmgr, retstr, laparams=laparams, imagewriter=imagewriter)
device = TextConverter(rsrcmgr, retstr, laparams=laparams,imagewriter=imagewriter )
data = []
skills = []
languages = []
summary = []
certifications = []
contact = []
linkedin = []
experience = []
education = []
complete_experience = []
complete_education = []
exp_dict = {}
edu_dict = {}
alld = {}
skills=[]
languages=[]
summary=[]
certifications=[]
contact=[]
linkedin=[]
experience=[]
education=[]
exp_dict={}
edu_dict={}
alld={}

interpreter = PDFPageInterpreter(rsrcmgr, device)
for page in PDFPage.get_pages(resume, caching=caching, check_extractable=True):
for page in PDFPage.get_pages(resume,caching=caching, check_extractable=True):
interpreter.process_page(page)
data = retstr.getvalue()

weird = ["\xa0", "\uf0da", "\x0c", "• ", "* ", "(LinkedIn)", " (LinkedIn)", "\uf0a7", "(Mobile)", "- ", "●", "Page 1 of 4", "Page 2 of 4", "Page 3 of 4", "Page 4 of 4"]
weird=["\xa0","\uf0da","\x0c","• ","* ","(LinkedIn)"," (LinkedIn)","\uf0a7","(Mobile)","- ","●"]
for i in weird:
data = data.replace(i, "")
data=data.replace(i, "")

result_list=data.split('\n')

result_list = data.split('\n')
lengthOfResultArray = result_list.__len__()
for i in result_list:
if i == 'Contact':
value = result_list.index(i)
if i=='Contact':
value=result_list.index(i)
while True:
value = value + 1
value=value+1
contact.append(result_list[value].strip())
if result_list[value] == '':
if result_list[value] =='':
contact.remove(result_list[value])
break

if i.__contains__('www.linkedin.com'):
value = result_list.index(i)
while True:
value=result_list.index(i)
while True:
linkedin.append(result_list[value])
value = value + 1
if result_list[value] == '':
value=value+1
if result_list[value] =='':
break
if len(linkedin) >= 2:
ln = []
merged = linkedin[0] + linkedin[1].strip()
if len(linkedin)>=2:
ln=[]
merged=linkedin[0]+linkedin[1].strip()
ln.append(merged)
linkedin = ln

if i == 'Top Skills':
value = result_list.index(i)
linkedin=ln
if i=='Top Skills':
value=result_list.index(i)
while True:
value = value + 1
value=value+1
skills.append(result_list[value])
if result_list[value] == '':
if result_list[value] =='':
skills.remove(result_list[value])
break

if i.__contains__('Certifications'):
value = result_list.index(i)
value=result_list.index(i)
while True:
value = value + 1
value=value+1
certifications.append(result_list[value])
if result_list[value] == '':
if result_list[value] =='':
certifications.remove(result_list[value])
break

if i.__contains__('Summary'):
value = result_list.index(i)
value=result_list.index(i)
while True:
value = value + 1
value=value+1
summary.append(result_list[value])
if result_list[value] == '':
if result_list[value] =='':
summary.remove(result_list[value])
break

if i == 'Languages':
value = result_list.index(i)
if i=='Languages':
value=result_list.index(i)
while True:
value = value + 1
value=value+1
languages.append(result_list[value])
if result_list[value] == '':
if result_list[value] =='':
languages.remove(result_list[value])
break

if i == 'Experience':
value = result_list.index(i)
value = value + 2


if i=='Experience':
value=result_list.index(i)
value=value+2
while True:
# Following condition checks if we have reached the end of the file, this is necessary in case if this section is the last section
if (value >= lengthOfResultArray - 1):
experience.append(result_list[value])
value=value+1
a=str(result_list[value])
if a.__contains__('-'):
k=a.split('-')
# print('start:',k[0],'end:',k[1])
break
# Following condition checks if we have encountered another section that means this section has finished
if (str(result_list[value]) == "Contact" or str(result_list[value]) == "Top Skills" or str(
result_list[value]) == "Certifications" or str(result_list[value]) == "Summary" or str(
result_list[value]) == "Languages" or str(result_list[value]) == "Education"):
elif result_list[value] =='':
break
if (result_list[value] == ''):
value += 1
experience = []
#Following condition checks if the next three non-empty lines of document are: Name of Company, Position, Period, and Location/Place respectively.
elif (result_list[value - 1] == "" and result_list[value + 1] != "" and result_list[
value + 2].__contains__("-")):
#If the above condition is true, we can fetch this experience object and save it in complete_experience array.
experience.append(result_list[value]) #Company Name
experience.append(result_list[value + 1]) #Job Title
experience.append(result_list[value + 2]) #Period
experience.append(result_list[value + 3]) #Place
listOfExp = ["company", "position", "period", "place"]
zipbObj = zip(listOfExp, experience)
exp_dict = dict(zipbObj)
complete_experience.append(exp_dict)
experience = []
#As we have fetched 4 indexes in above code, and we know the value at fifth index can either be a description or an empty space,
#so we increment the counter to 5.
value += 5
else:
value += 1

listOfExp = ["company", "position","period","place","description" ]
zipbObj = zip(listOfExp, experience)
exp_dict = dict(zipbObj)
exp_dict['startdate']=k[0]
exp_dict['enddate']=k[1]

if i == 'Education':
value = result_list.index(i)
value = value + 1
index = 0;
if i=='Education':
value=result_list.index(i)
value=value+1
while True:
#Following condition checks if we have reached the end of the file, this is necessary in case if this section is the last section
if (value >= lengthOfResultArray - 1):
education.append(result_list[value])
value=value+1
if result_list[value] =='':
break
#Following condition checks if we have encountered another section that means this section has finished
if (str(result_list[value]) == "Contact" or str(result_list[value]) == "Top Skills" or str(
result_list[value]) == "Certifications" or str(result_list[value]) == "Summary" or str(
result_list[value]) == "Languages" or str(result_list[value]) == "Experience"):
break
if result_list[value] == '':
value = value + 1
else:
education.append(result_list[value])
value = value + 1
index += 1
if (index == 2):
#When we have fetched the 2 values(school & degree) in the education array, we can now create an education object from this array
listOfEdu = ["school", "degree"]
zipbObj = zip(listOfEdu, education)
edu_dict = dict(zipbObj)
#Save the education object in complete_education array. This complete_education array will have all the education objects
complete_education.append(edu_dict)
index = 0
education = []
listOfEdu = ["school", "degree" ]
zipbObj = zip(listOfEdu, education)
edu_dict = dict(zipbObj)

alld['contact'] = contact
alld['skills'] = skills
alld['linkedin'] = linkedin[0]
alld['skills'] = skills
alld['certifications'] = certifications
alld['summary'] = summary
alld['languages'] = languages
alld['experience'] = complete_experience
alld['education'] = complete_education
alld['contact']=contact
alld['skills']=skills
alld['linkedin']=linkedin[0]
alld['skills']=skills
alld['certifications']=certifications
alld['summary']=summary
alld['languages']= languages
alld.update(edu_dict)
alld.update(exp_dict)
device.close()
retstr.close()

Expand Down