# Workbench for ResumeAssistant

In [None]:
from pdf2image import convert_from_path
# ImageWorker takes in the path to jpg resume image and returns the contents of the resume in a json format. 
from resumeassistant.operator.worker import ImageWorker, TextWorker
from resumeassistant.data.candidate_record import Record, QA_Knowledge
from openai import OpenAI
import json

# PDF to jpg image converter function
This function takes the input path for pdf-resume and output path for resume image. The output images are stored in 'jpg' format. 

In [None]:
def get_img(pdf_path, img_path):
      # Method for converting pdf resumes to jpg images. 
      images = convert_from_path(pdf_path)
      # print(type(images))
      for i in range(len(images)):
        
            # Save pages as images in the pdf
          images[i].save(img_path + str(i) +'.jpg', 'JPEG')

This is the code to extract OpenAI key stored in `gpt_key.json` file. \
Format: \
    ```{ 
	"api_key": \<open_ai_key\>
    }```

In [None]:
with open('gpt_key.json', 'r') as file:
      data = json.load(file)
      api_key = data["api_key"]

## Resume Parsing
This is the code for converting the PDF resume to an image and parsing it using "GPT-4o". 

In [None]:
client = OpenAI(api_key=api_key)
get_img('Resume_PhD.pdf', './Harish_Resume')

sys_prompt = "You are a resume parser. Given the image of the resume, parse every section and provide the output in a JSON format."
user_prompt="Can you parse all sections of this resume?"

image_worker = ImageWorker(client=client, model_id="gpt-4o-mini", sys_prompt=sys_prompt, user_prompt=user_prompt)
parsed_resume = image_worker.get_output('Harish_Resume0.jpg')
print(parsed_resume)

## Add CV and additional information
The current_cv instance keeps a record of the most recent updated Cover Letter and additional information on work experience, projects, achievements, and certifications. 

### Input arguments:
`resume`=Parsed resume (Dict). \
`cv` = Cover letter text. \
`add_skills` = Additional skills not mentioned in the resume (Dict). \
`add_work_experience` = Additional work experience not mentioned in the resume (Dict). \
`add_education` = Additional education not mentioned in the resume (Dict). \
`add_projects` = Additional Projects not mentioned in the resume (Dict). \
`add_achievements` = Additional achievements not mentioned in the resume (Dict). \
`add_certifications` = Additional certifications not mentioned in the resume (Dict). 

#### additional_information format:
```{``` \
```"information": text,``` \
```"organization": text,``` \
```"date": "dd/mm/yyyy - dd/mm/yyyy" (or) "dd/mm/yyyy"``` \
```}```

The job_listing instance keeps a record of the requirements and description of the job posting. 

In [None]:
print(parsed_resume.keys())

## Resume Optimizer

In [None]:
def generate_resume(candidate_info, job_description):
    ro_system_prompt = 'You are a resume optimizer. \
    Given the candidate information(#candidate_info) and the job description(#job_description), \
    rewrite the current resume to highlight the relevant skills, experience, and quantifiable achievements that match the job description. \
    Provide the output in a JSON format.'
    
    ro_user_prompt = f'#candidate_info: {candidate_info} \
                    \n#job_description: {job_description}'
    
    ro_text_worker = TextWorker(client=client, model_id="gpt-4o-mini", sys_prompt=ro_system_prompt, user_prompt=ro_user_prompt, json_format=True)
    new_resume = ro_text_worker.get_output()
    return new_resume
# print(new_resume)

## Resume Screening
In this section, the generated resume is screened based on how informative it is w.r.t the job posting. 
The resume screener gathers the keywords from the job description where the new resume fails to address, and a question requesting the missing information to address the unfulfilled requirements. 


In [None]:
def screen_resume_CL(resume, cover_leter, job_description):
    ra_system_prompt = 'You are a resume and cover letter screener.\
                        given the current resume(#current_resume), cover letter(#current_cover_letter) and the \
                        job description(#job_description), \
                        provide keywords from the job description where the resume and the cover letter fail to address the requirements. \
                        Provide the keywords with a question describing how the resume and the cover letter fail to meet this requirement. \
                        Present the output in the following JSON output format: \
                        \n\t{\"keywords\": [{"keyword": system keyword, "question": system keyword question}]}'
    # ra_user_prompt = f'#current_resume: {record.resume} \
                        #job_description: {job_description}'
    
    ra_user_prompt = f'#current_resume: {resume} \
                        #job_description: {job_description} \
                        #current_cover_letter: {cover_leter}' 
    
    ra_text_worker = TextWorker(client=client, model_id="gpt-4o-mini", sys_prompt=ra_system_prompt, user_prompt=ra_user_prompt, json_format=True)
    system_insights = ra_text_worker.get_output()
    return system_insights
# print(system_insights)

## Cover Letter Generation
This is the code for generating the cover letter: 

In [None]:
def CL_generator(candidate_info, job_description):

    cl_system_prompt = "You are a cover letter generator, given the candidate information(#candidate_info) and job description(#job_description). \
                        Generate a cover letter relevant to the job description highlighting the candidate's skills and achievements"
    
    cl_user_prompt = f'#candidate_info: {candidate_info} \
                        #job_description: {job_description}'
    
    
    cl_text_worker = TextWorker(client=client, model_id="gpt-4o-mini", sys_prompt=cl_system_prompt, user_prompt=cl_user_prompt)
    new_cl = cl_text_worker.get_output()
    return new_cl
# print(new_cl)

## Create the Candidate-Record 

In [None]:
old_cl = "Hello Hiring Team, \nI am pursuing my Ph.D. in the Computer Engineering department at Stevens Institute of Technology; \
            my research is focused on Natural Language Processing. \
            I work on applications related to the areas of Misinformation Research and Investigative Journalism using Evidence Extraction, \
            Fact Verification and Key-Phrase analysis; I research and work with Generative Models, Deep Learning models, Attention Networks, \
            and Large Language Models for Extraction and Verification Tasks. \
            My Dissertation Project is based on Fact-Verification and Evidence-Extraction using an Explainable Prompt-Engineering Model \
            I proposed. \nEven though I chose the use cases of NLP as the focus of my research, \
            my work is more comprehensive than NLP-based models; \
            I have also created a low-cost Derivative-Free Optimization Method for parameter optimization in ML; \
            I am working on obtaining a Provisional Patent for this research. \
            I am immensely passionate about learning and analyzing the underlying Mathematical Concepts of Machine Learning Models; \
            I like educating myself on all the specifics of any problem statement, following the motivation of the research, \
            chronological evolution of the Problem-Solution framework, and feature analysis from historical to the SOTA models. \
            \nI am currently seeking a job opportunity to hone my skills and work with a proficient team for an inspirational \
            cause where AI can be efficacious. \nRegards, \nHarish Sista, Ph.D. candidate, \nComputer Engineering Department, \
            \nStevens Institute of Technology."

record = Record(resume=parsed_resume, cl=old_cl)
with open('job_description.txt', 'r') as f:
    job_description = f.read()

## Analyze missing information using Resume screener

In [None]:
key_questions = screen_resume_CL(record.resume, record.cl, job_description)
print(key_questions)

## Add additional information generation

In [None]:
add_knowledge = QA_Knowledge(0, key_questions)

#Add custom keywords and questions
key_q2 = {'keywords': 
          [{'keyword': 'data preprocessing and feature engineering', 
            'question': 'Can you explain how your resume reflects proficiency in data preprocessing and feature engineering techniques?, \
            particularly scikit-learn?'}, 
           {'keyword': 'collaborate with cross-functional teams to integrate AI solutions', 
            'question': "Can you provide examples from your resume that demonstrate your experience \
                  collaborating with cross-functional teams to integrate AI solutions?"} 
           ]}
add_knowledge.add_key_questions(2, key_q2)

#Add candidate answers as key-knowledge. 
key_knowledge = {'5+ years of experience in working with AI/LLMs': 
                 {'question': 'How does the resume demonstrate at least 5 years of experience specifically working with \
                 AI and large language models (LLMs) in a direct capacity?',
                    'answer': 'As a part of my six years of PhD research in NLP, I have worked on various projects based on knowledge extraction from scholarly data and evidence extraction and fact verification of social-media data using web-scraped real-world data. I used LLMs for building all these projects.'
                 }, 
                 'data preprocessing and feature engineering':{ 
                'question': 'Can you explain how your resume reflects proficiency in data preprocessing and feature engineering techniques?',
                 'answer': 'In my PhD program, I have worked on different varieties of datasets like medical data(CORD-19, PubMed), \
                 social media data(CoVerifi), scholarly data(FEVER-dataset), and real-world data(AVeriTec). \
                 All these datasets use different feature spaces. To process these datasets \
                 I have used various feature engineering techniques like Prompt Engineering, Entity Extraction, key-word extraction, \
                 parts-of-speech tagging and text denoising using Python\'s RegEx library.'}, 
                 'Experience with big data tools and technologies (e.g., Hadoop, Spark)': { 
                  'question': 'How does the resume and cover letter demonstrate experience with big data tools and \
                  technologies such as Hadoop or Spark?',
                 'answer': None}, 
                 'strong understanding of machine learning algorithms and techniques':{
                  'question': 'How does your resume showcase a strong understanding of various machine learning \
                  algorithms and their techniques?',
                 'answer': None}, 
                 'collaborate with cross-functional teams to integrate AI solutions': { 
                  'question': 'Can you provide examples from your resume that demonstrate your experience \
                  collaborating with cross-functional teams to integrate AI solutions?',
                 'answer': 'In my previous job at \'Fresh Digital Group,\' I have collaborated with various teams like content writers \
                 and UI designers to brainstorm on building structure for chat applications and designing UI for mobile applications.'}}
print(type(key_knowledge))
add_knowledge.add_knowledge(key_knowledge)
print(add_knowledge.get_knowledge())



## Add additional information to the candidate record

In [None]:
record.add_knowledge=add_knowledge.get_knowledge()

#Get candidate's updated record
print(record.get_record())

## Generate new cover letter using candidate record

In [None]:
with open('job_description.txt', 'r') as f:
    job_description = f.read()
new_cl = CL_generator(record.get_record(), job_description)
print(new_cl)

# We can give the user the capability of the number of words in a cover letter. 

## Generate new Resume

In [None]:
new_resume = generate_resume(record.get_record(), job_description)
print(new_resume)

### Re-run Screening

In [None]:
key_questions = screen_resume_CL(record.resume, new_cl, job_description)
print(key_questions)

### Add more knowledge 

In [None]:
key_knowledge = {'large-scale LLMs in the 10s to 100s of billions of parameters': 
                 {'question': 'How does the candidate\'s experience with LLMs apply to large-scale models with billions \
                 of parameters, and is there any specific mention or evidence of working at this scale?',
                    'answer': 'In my Fact Verification Research, I have worked on GPT-4o(which has 1.7 Trillion parameters), \
                    LLAMA-3 8B, and MISTRAL 7B are close to 10B parameters. I have implemented these models on \
                    AWS EC2 and serverless environments like FireworksAI and LightningAI.' }, 
                 'specialized expertise in topics like fine-tuning, RLHF, LLM tool-use':{ 
                'question': 'Does the candidate demonstrate specialized expertise in RLHF \
                (Reinforcement Learning from Human Feedback) or other LLM tool-use techniques as outlined in the job description?',
                 'answer': 'In my Fact Verification research, I have worked on various prompt engineering techniques to reduce \
                 hallucination in LLMs. I also built a resume application that takes feedback from the candidate to optimize their profile.'}, 
                 'democratizing access to modern AI technology': { 
                  'question': 'How does the candidate intend to contribute \
                     to the democratization of AI technology, and what initiatives or projects have they been involved in that align with this goal?',
                 'answer': 'I am currently collaborating with a team of developers to convert my resume application to a web application and make it \
                 open-source for everyone on Git Hub.'}, 
                 }
print(type(key_knowledge))
add_knowledge.add_knowledge(key_knowledge)
print(add_knowledge.get_knowledge())
record.add_knowledge.update(add_knowledge.get_knowledge())