# Workbench for ResumeAssistant

In [1]:
from pdf2image import convert_from_path
# ImageWorker takes in the path to jpg resume image and returns the contents of the resume in a json format. 
from resumeassistant.operator.worker import ImageWorker, TextWorker
from resumeassistant.data.candidate_record import Record
from openai import OpenAI
import json

# PDF to jpg image converter function
This function takes the input path for pdf-resume and output path for resume image. The output images are stored in 'jpg' format. 

In [2]:
def get_img(pdf_path, img_path):
      # Method for converting pdf resumes to jpg images. 
      images = convert_from_path(pdf_path)
      # print(type(images))
      for i in range(len(images)):
        
            # Save pages as images in the pdf
          images[i].save(img_path + str(i) +'.jpg', 'JPEG')

This is the code to extract OpenAI key stored in `gpt_key.json` file. \
Format: \
    ```{ 
	"api_key": \<open_ai_key\>
    }```

In [3]:
with open('gpt_key.json', 'r') as file:
      data = json.load(file)
      api_key = data["api_key"]

## Resume Parsing
This is the code for converting the PDF resume to an image and parsing it using "GPT-4o". 

In [4]:
client = OpenAI(api_key=api_key)
get_img('Harish_Resume_PhD.pdf', './Harish_Resume')

sys_prompt = "You are a resume parser. Given the image of the resume, parse every section and provide the output in a JSON format."
user_prompt="Can you parse all sections of this resume?"

image_worker = ImageWorker(client=client, model_id="gpt-4o-mini", sys_prompt=sys_prompt, user_prompt=user_prompt)
parsed_resume = image_worker.get_output('Harish_Resume0.jpg')
print(parsed_resume)

{'name': 'Harish Sista', 'contact_info': {'phone': '(201) 310-5683', 'email': 'hsista@stevens.edu', 'address': '2642 28th St, Apt 4F, Astoria, NY-11102'}, 'summary': 'Ph.D. Candidate researching in ML and NLP with 7+ years of experience working with ML models, Big Data, and Cloud Computing platforms and 2+ years of experience working in a professional environment. Spearheaded an Evidence Extraction and Fact Verification model using Prompt Engineering and tested it with various LLMs. Invented a Black Box Optimization model for online hyperparameter fine-tuning, significantly enhancing model performance in real-time applications.', 'skills': {'Programming_Languages': ['Python', 'SQL', 'C++', 'Java', 'Swift', 'HTML'], 'AI_Tools': ['PyTorch', 'HuggingFace', 'TensorFlow', 'AutoML', 'SciKitLearn', 'NumPy'], 'LLMs': ['GPT', 'LLAMA', 'BLOOM', 'BERT'], 'Data_Engineering': ['AWS EC2', 'FireWorks AI', 'Lightning AI', 'Data Pipelines', 'Data Augmentation'], 'Cloud_Platforms': ['AWS', 'Google Cloud

## Add CV and additional information
The current_cv instance keeps a record of the most recent updated Cover Letter and additional information on work experience, projects, achievements, and certifications. 

### Input arguments:
`resume`=Parsed resume (Dict). \
`cv` = Cover letter text. \
`add_skills` = Additional skills not mentioned in the resume (Dict). \
`add_work_experience` = Additional work experience not mentioned in the resume (Dict). \
`add_education` = Additional education not mentioned in the resume (Dict). \
`add_projects` = Additional Projects not mentioned in the resume (Dict). \
`add_achievements` = Additional achievements not mentioned in the resume (Dict). \
`add_certifications` = Additional certifications not mentioned in the resume (Dict). 

#### additional_information format:
```{``` \
```"information": text,``` \
```"organization": text,``` \
```"date": "dd/mm/yyyy - dd/mm/yyyy" (or) "dd/mm/yyyy"``` \
```}```

The job_listing instance keeps a record of the requirements and description of the job posting. 

In [5]:
print(parsed_resume.keys())

dict_keys(['name', 'contact_info', 'summary', 'skills', 'education', 'research_experience', 'work_experience', 'certifications'])


In [10]:
record = Record(resume=parsed_resume)
with open('job_description.txt', 'r') as f:
    job_description = f.read()

ro_system_prompt = 'You are a resume optimizer. \
Given the current resume(#current_resume) and the job description(#job_description), \
rewrite the current resume to highlight the relevant skills, experience, and quantifiable achievements that match the job description. \
Provide the output in a JSON format.'

ro_user_prompt = f'Please optimize my current resume \
The current resume is provided under #current_resume \
and job description is provided under #job_description \
\n\t#current_resume: {record.resume} \
\n\t#job_description: {job_description}'

ro_text_worker = TextWorker(client=client, model_id="gpt-4o-mini", sys_prompt=ro_system_prompt, user_prompt=ro_user_prompt, json_format=True)
new_resume = ro_text_worker.get_output()
print(new_resume)

{'name': 'Harish Sista', 'contact_info': {'phone': '(201) 310-5683', 'email': 'hsista@stevens.edu', 'address': '2642 28th St, Apt 4F, Astoria, NY-11102'}, 'summary': 'Ph.D. Candidate specializing in Machine Learning and Generative AI with over 7+ years of experience in building and optimizing ML models, data pipelines, and AI-driven applications. Proven track record of developing innovative solutions through rapid prototyping, feature engineering, and collaboration with cross-functional teams. Committed to leveraging cutting-edge technology to transform operations and enhance user experience.', 'skills': {'Programming_Languages': ['Python', 'SQL', 'C++', 'Java'], 'AI_Tools': ['PyTorch', 'HuggingFace', 'TensorFlow', 'AutoML', 'SciKitLearn', 'NumPy'], 'Data_Engineering': ['AWS EC2', 'Data Pipelines', 'Data Augmentation', 'Snowflake', 'Automated Workflows'], 'LLMs': ['GPT', 'LLAMA', 'BLOOM', 'BERT'], 'Cloud_Platforms': ['AWS', 'Google Cloud', 'GIT', 'Heroku'], 'Generative_AI': ['Prompt En

## Resume Screening
In this section, the generated resume is screened based on how informative it is w.r.t the job posting. 
The resume screener gathers the keywords from the job description where the new resume fails to address, and a question requesting the missing information to address the unfulfilled requirements. 


In [12]:
ra_system_prompt = 'You are a resume screener and a question generator.\
                    given the current resume(#current_resume) and the job description(#job_description), \
                    provide keywords from the job description where the resume fails to address the requirements. \
                    Provide the keywords with a question describing how the resume fails to meet this requirement. \
                    Present the output in the following JSON output format: \
                    \n\t{\"keywords\": [{"keyword": system keyword, "question": system keyword question}]}'
# ra_user_prompt = f'#current_resume: {record.resume} \
                    #job_description: {job_description}'

ra_user_prompt = f'#current_resume: {new_resume} \
                    #job_description: {job_description}'

ra_text_worker = TextWorker(client=client, model_id="gpt-4o-mini", sys_prompt=ra_system_prompt, user_prompt=ra_user_prompt, json_format=True)
system_insights = ra_text_worker.get_output()
print(system_insights)

{'keywords': [{'keyword': '1-3 years of experience working on machine learning or AI projects', 'question': 'How does your experience align with the requirement for 1-3 years of experience specifically working on machine learning or AI projects, with a focus on model feature set preparation?'}, {'keyword': 'Proficiency with Snowflake', 'question': 'Can you elaborate on your experience with Snowflake or similar platforms for large-scale data management and pipeline creation, as this is mentioned as a preference in the job requirements?'}, {'keyword': 'AutoML tools and automated feature engineering techniques', 'question': 'What specific experience or familiarity do you have with AutoML tools and automated feature engineering techniques to support rapid model development, as highlighted in the job description?'}, {'keyword': 'Experience using AI tools in Snowflake', 'question': 'Have you had any experience using AI tools in Snowflake, like Cortex, for transforming data and/or feature eng