In [None]:
from PyPDF2 import PdfReader
import openai
import datetime
from docx import Document
from docx.shared import Pt, Cm

import os
os.chdir('..')
import utils
import main 
openai.api_key = utils.open_file('openai_api_key.txt')

# CV

## PDF extraction

In [None]:
# creating a pdf reader object
reader = PdfReader('files/cv.pdf')
  
# printing number of pages in pdf file
print(len(reader.pages))
  
# getting a specific page from the pdf file
page = reader.pages[0]
  
# extracting text from page
text = page.extract_text().strip().replace('É', 'E')
text

## Content extraction

In [None]:
cv_discussion = [
    {"role": "system", "content": "You are a smart CV parser, you extract specific elements from curriculum vitae and output them in a specific format. When asked, output the desired format only, no human response."},
    {"role": "user", "content": f"Here is the CV that you are going to parse: <<{text}>>"},
    {"role": "assistant", "content": "What specific elements do you want me to extract?"},
    {"role": "user", "content": f"I want you to extract the professional experiences and the tasks/missions performed at each position"},
    {"role": "assistant", "content": "What is the expected output format?"},
]

experience_prompt = "Expected output format:\n[Company name]: [Job title]\n- Duration: [Start date] - [End date]\n- Tasks:\n  - [Task description 1]\n  - [Task description 2]\n  - [Task description 3]\n...\n\nAnswer the formatted output only, add nothing else, be exhaustive and copy paste the missions descriptions as they are. Freelance experiences count (skip the [Company Name] part). Do it now."
experience, cv_discussion = utils.gpt3_chat(message=experience_prompt, messages=cv_discussion)
print(experience)

In [None]:
education_prompt = "Perfect. Do the same with the education part now. Expected output format:\n[School Name]:\n- Duration: [Start date] - [End date]\n- Degree: [Degree]\n- [Courses]: [List courses]\n...\n\nAnswer the formatted output only, add nothing else."
education, cv_discussion = utils.gpt3_chat(message=education_prompt, messages=cv_discussion)
print(education)

In [None]:
name_prompt = "Perfect. What is the name on the CV? Just answer the name, nothing else. For example if the name is 'Bill Gates', just output 'Bill Gates'"
name, cv_discussion = utils.gpt3_chat(message=name_prompt, messages=cv_discussion)
name = utils.clean_word_output(name)
print(name)

In [None]:
degree_and_school_names_prompt = "If the candidate graduating from a school in 2023, ouput '[Degree] at [School Name]' only. If it's not the case, just output 'Not graduating'"
degree_and_school_names, cv_discussion = utils.gpt3_chat(message=degree_and_school_names_prompt, messages=cv_discussion)
print(degree_and_school_names)

In [None]:
current_position_prompt = "If the candidate is currently working (i.e. if a professional experience is still ongoing), output '[Job title] at [Company name]' only. If it's not the case, just output 'Not working'."
current_position, cv_discussion = utils.gpt3_chat(message=current_position_prompt, messages=cv_discussion)
current_position

In [None]:
is_gratuating = 'not graduating' not in degree_and_school_names.lower()
print(is_gratuating)

is_working = 'not working' not in current_position.lower()
print(is_working)

# Job offer

## Scraping

In [None]:
import requests
from bs4 import BeautifulSoup

In [None]:
# url = "https://www.welcometothejungle.com/fr/companies/ouicar/jobs/senior-data-scientist_paris?q=ef63906a45dcc458384ffbf2f2dbf3b9&o=1766155"
url = "https://lydiasolutions.teamtailor.com/jobs/2605845-data-scientist"
page = requests.get(url)

In [None]:
soup = BeautifulSoup(page.content, 'html.parser')

In [None]:
job_desc = ' '.join([el.text for el in soup.find_all('p') if 'cookie' not in el.text])

In [None]:
job_desc

## Content extraction

In [None]:
offer_discussion = [
    {"role": "system", "content": "You are a smart Job Description Parser. You extract specific elements from job offers and output them in a specific format. You answer in English only, and translate if needed."},
    {"role": "user", "content": f"Here is the job description that you are going to parse: <<{job_desc}>>"},
    {"role": "assistant", "content": "What specific elements do you want me to extract?"}
]

missions_prompt = f"Extract, sum up and list the 3 main missions of this job. Answer in English only and go straight to the 3 points, no introduction sentence."
missions, offer_discussion = utils.gpt3_chat(message=missions_prompt, messages=offer_discussion)
print(missions)

In [None]:
job_title_prompt = f"Extract the job title (if the job title is not clearly mentioned, imagine what it could be). Output the job title only, do not make a sentence. For example, if the job title is Marketing Lead, you must output 'Marketing Lead' only."
job_title, offer_discussion = utils.gpt3_chat(message=job_title_prompt, messages=offer_discussion)
job_title = utils.clean_word_output(job_title)
job_title

In [None]:
company_name_prompt = f"Extract the name of the company. If the company name is not clearly mentioned, anwser 'XXX'. Output the company name only, do not make a sentence. For example, if the company name is Microsoft, you must output 'Microsoft' only."
company_name, offer_discussion = utils.gpt3_chat(message=company_name_prompt, messages=offer_discussion)
company_name = utils.clean_word_output(company_name)
company_name

In [None]:
motivations_prompt = f"Extract, sum up and list the 3 main reasons why it is an amazing opportunity for the candidate to work as a {job_title} at {company_name}. Insist on the company's strengths (ex: leader in its domain, great corporate culture, fast-growing environment) and on its mission. Answer in English only and go straight to the 3 points, no introduction sentence."
motivations, offer_discussion = utils.gpt3_chat(message=motivations_prompt, messages=offer_discussion)
print(motivations)

# Cover Letter

## Create blocks

In [None]:
# Export checkpoints
# checkpoints = {
#     'experience': experience,
#     'education': education,
#     'name': name,
#     'degree_and_school_names': degree_and_school_names,
#     'current_position': current_position,
#     'is_gratuating': is_gratuating,
#     'is_working': is_working,
#     'missions': missions,
#     'job_title': job_title,
#     'company_name': company_name,
#     'motivations': motivations
# }

# def write_checkpoint(file_name, text):
#     with open(f"checkpoints/{file_name}.txt", 'w') as outfile:
#         outfile.write(text)

# for k, v in checkpoints.items():
#     write_checkpoint(k, str(v))

In [None]:
# # Load checkpoints
# experience = utils.open_file("checkpoints/experience.txt")
# education = utils.open_file("checkpoints/education.txt")
# name = utils.open_file("checkpoints/name.txt")
# degree_and_school_names = utils.open_file("checkpoints/degree_and_school_names.txt")
# current_position = utils.open_file("checkpoints/current_position.txt")
# is_gratuating = bool(utils.open_file("checkpoints/is_gratuating.txt"))
# is_working = bool(utils.open_file("checkpoints/is_working.txt"))
# missions = utils.open_file("checkpoints/missions.txt")
# job_title = utils.open_file("checkpoints/job_title.txt")
# company_name = utils.open_file("checkpoints/company_name.txt")
# motivations = utils.open_file("checkpoints/motivations.txt")

In [None]:
prompt_filling_dict = {'<<EXPERIENCE>>': experience,
                       '<<EDUCATION>>': education,
                       '<<NAME>>': name,
                       '<<DEGREE_AND_SCHOOL_NAMES>>': degree_and_school_names,
                       '<<CURRENT_POSITION>>': current_position,
                       '<<IS_GRATUATING>>': is_gratuating,
                       '<<IS_WORKING>>': is_working,
                       '<<MISSIONS>>': missions,
                       '<<JOB_TITLE>>': job_title,
                       '<<COMPANY_NAME>>': company_name,
                       '<<MOTIVATIONS>>': motivations}

In [None]:
# Date
now = datetime.datetime.now()
date_block = f"{now.strftime('%B')} {now.strftime('%d')}, {now.strftime('%Y')}"
print(date_block)

In [None]:
# Subject
prompt_path = "prompts/cl_subject.txt"
subject_block = utils.fill_prompt(utils.open_file(prompt_path), prompt_filling_dict)
print(subject_block)


In [None]:
salutations_block = 'Dear Sir or Madam,'

In [None]:
# Intro 
if is_gratuating and is_working:
    prompt_path = "prompts/cl_intro_graduate_working.txt"
elif is_gratuating:
    prompt_path = "prompts/cl_intro_graduate.txt"
elif is_working:
    prompt_path = "prompts/cl_intro_working.txt"
else:
    prompt_path = "prompts/cl_intro_standard.txt"

prompt = utils.fill_prompt(utils.open_file(prompt_path), prompt_filling_dict)
print(prompt)
intro_block = utils.gpt3_completion(prompt, temp=0, tokens=100)
print(intro_block)


In [None]:
# Motivations
prompt_path = "prompts/cl_motivations.txt"

prompt = utils.fill_prompt(utils.open_file(prompt_path), prompt_filling_dict)
print(prompt)
motivations_block = utils.gpt3_completion(prompt, temp=0, tokens=300).replace('\n', '')
print(motivations_block)

In [None]:
# Experiences
prompt_path = "prompts/cl_experiences.txt"
experience_transition = 'I am confident that my previous experiences demonstrate my alignment with these three motivations and explain my eagerness to work as <<JOB_TITLE>> at <<COMPANY_NAME>>. '
experience_transition = utils.fill_prompt(experience_transition, prompt_filling_dict)

prompt = utils.fill_prompt(utils.open_file(prompt_path), prompt_filling_dict)
print(prompt)
experiences_block = utils.gpt3_completion(prompt, temp=0, tokens=300).replace('\n', '')
experiences_block = experience_transition + experiences_block
print(experiences_block)

In [None]:
# Education
prompt_path = "prompts/cl_education.txt"

prompt = utils.fill_prompt(utils.open_file(prompt_path), prompt_filling_dict)
print(prompt)
education_block = utils.gpt3_completion(prompt, temp=0, tokens=300).replace('\n', '')
print(education_block)

In [None]:
# Missions
prompt_path = "prompts/cl_missions.txt"

prompt = utils.fill_prompt(utils.open_file(prompt_path), prompt_filling_dict)
print(prompt)
missions_block = utils.gpt3_completion(prompt, temp=0, tokens=300).replace('\n', '')
print(missions_block)

In [None]:
# Greeting
prompt_path = "prompts/cl_greeting.txt"
greeting_block = utils.fill_prompt(utils.open_file(prompt_path), prompt_filling_dict)
print(greeting_block)

In [None]:
# Closing
closing_block = 'Yours sincerely,'

## Assemble blocks

In [None]:
# CL
prompt_path = "prompts/cl_body.txt"
blocks = {'<<DATE>>': date_block, 
          '<<SUBJECT>>': subject_block,
          '<<SALUTATIONS>>': salutations_block,
          '<<INTRO>>': intro_block, 
          '<<MOTIVATIONS>>': motivations_block,
          '<<EXPERIENCES>>': experiences_block,
          '<<EDUCATION>>': education_block,
          '<<MISSIONS>>': missions_block,
          '<<GREETING>>': greeting_block,
          '<<CANDIDATE_NAME>>': name
}


print(utils.fill_prompt(utils.open_file(prompt_path), blocks))

In [None]:
document = Document()

sections = document.sections
for section in sections:
    section.top_margin = Cm(3)
    section.bottom_margin = Cm(2)
    section.left_margin = Cm(2.5)
    section.right_margin = Cm(2.5)

date = document.add_paragraph(date_block)
date.alignment = 2
document.add_paragraph()

subject = document.add_paragraph()
subject.add_run(subject_block).bold = True
subject.alignment = 3

salutations = document.add_paragraph(salutations_block)

for block in [intro_block, motivations_block, experiences_block, education_block, missions_block, greeting_block]:
    p = document.add_paragraph(block)
    p.alignment = 3

closing = document.add_paragraph(closing_block)

document.add_paragraph()
signature = document.add_paragraph(name)
signature.alignment = 2

style = document.styles['Normal']
font = style.font
font.name = 'Times New Roman'
font.size = Pt(9.5)


job_title = job_title.replace(' ', '_').lower()
company_name = company_name.replace(' ', '_').lower()
current_datetime = now.strftime('%Y%m%d') + '_' + now.strftime('%H%M%S')
document.save(f'results/cl_{company_name}_{job_title}_{current_datetime}.docx')

In [None]:
def create_cover_letter(cl_blocks):
    document = Document()

    sections = document.sections
    for section in sections:
        section.top_margin = Cm(3)
        section.bottom_margin = Cm(2)
        section.left_margin = Cm(2.5)
        section.right_margin = Cm(2.5)

    date = document.add_paragraph(cl_blocks['date_block'])
    date.alignment = 2
    document.add_paragraph()

    subject = document.add_paragraph()
    subject.add_run(cl_blocks['subject_block']).bold = True
    subject.alignment = 3

    salutations = document.add_paragraph(cl_blocks['salutations_block'])

    for block in [cl_blocks['intro_block'], cl_blocks['motivations_block'], cl_blocks['experiences_block'], cl_blocks['education_block'], cl_blocks['missions_block'], cl_blocks['greeting_block']]:
        p = document.add_paragraph(block)
        p.alignment = 3

    closing = document.add_paragraph(cl_blocks['closing_block'])

    document.add_paragraph()
    signature = document.add_paragraph(name)
    signature.alignment = 2

    style = document.styles['Normal']
    font = style.font
    font.name = 'Times New Roman'
    font.size = Pt(9.5)


    job_title = job_title.replace(' ', '_').lower()
    company_name = company_name.replace(' ', '_').lower()
    current_datetime = now.strftime('%Y%m%d') + '_' + now.strftime('%H%M%S')
    document.save(f'results/cl_{company_name}_{job_title}_{current_datetime}.docx')
    return document


# Run all

In [None]:
url = "https://www.welcometothejungle.com/fr/companies/mirakl/jobs/senior-data-scientist_paris_MIRAK_Dj4bY8?q=4633b4d5fd725173ac7a70679fa76096&o=1249537"
print('scraping the url and extracting content...')
offer = main.extract_text_from_url(url)
print('extracting content from CV...')
cv = main.extract_text_from_pdf('files/cv.pdf')
print('checking that the file provided is a CV and the url refers a job offer...')
if not main.is_a_cv(cv):
    print('Sorry buddy, but the CV you uploaded does not look like a CV 🤯')
elif not main.is_a_job_offer(offer):
    print('Sorry buddy, but the URL you provided does not look like job offer 🤯')
else:
    pass

In [None]:
print('parsing the CV to extract specific content...')
cv_content = main.extract_content_from_CV(cv)

In [None]:
print('parsing the job offer to extract specific content...')
offer_content = main.extract_content_from_offer(offer)
prompt_filling_dict = main.create_prompt_filling_dict(cv_content, offer_content)

In [None]:
print('creating the different cover letter blocks...')
cl_blocks = main.create_cover_letter_blocks(prompt_filling_dict)
print('creating the word document...')
document = main.create_cover_letter(cl_blocks)