# Workbench for ResumeAssistant

In [1]:
from pdf2image import convert_from_path
# ImageWorker takes in the path to jpg resume image and returns the contents of the resume in a json format. 
from resumeassistant.operator.worker import ImageWorker, TextWorker
from resumeassistant.data.candidate_record import Record, QA_Knowledge
from openai import OpenAI
import json

# PDF to jpg image converter function
This function takes the input path for pdf-resume and output path for resume image. The output images are stored in 'jpg' format. 

In [2]:
def get_img(pdf_path, img_path):
      # Method for converting pdf resumes to jpg images. 
      images = convert_from_path(pdf_path)
      # print(type(images))
      for i in range(len(images)):
        
            # Save pages as images in the pdf
          images[i].save(img_path + str(i) +'.jpg', 'JPEG')

This is the code to extract OpenAI key stored in `gpt_key.json` file. \
Format: \
    ```{ 
	"api_key": \<open_ai_key\>
    }```

In [3]:
with open('gpt_key.json', 'r') as file:
      data = json.load(file)
      api_key = data["api_key"]

## Resume Parsing
This is the code for converting the PDF resume to an image and parsing it using "GPT-4o". 

In [4]:
client = OpenAI(api_key=api_key)
get_img('Resume_PhD.pdf', './Harish_Resume')

sys_prompt = "You are a resume parser. Given the image of the resume, parse every section and provide the output in a JSON format."
user_prompt="Can you parse all sections of this resume?"

image_worker = ImageWorker(client=client, model_id="gpt-4o-mini", sys_prompt=sys_prompt, user_prompt=user_prompt)
parsed_resume = image_worker.get_output('Harish_Resume0.jpg')
print(parsed_resume)

{'name': 'Harish Sista', 'contact': {'phone': '(201) 310-5683', 'email': 'hsista@stevens.edu', 'address': '2642 28th St, Apt 4F, Astoria, NY-11102'}, 'visa_status': 'No visa sponsorship is required', 'summary': 'Ph.D. Candidate researching in ML and NLP with 7+ years of experience working with ML models, Big Data, and Cloud Computing platforms and 2+ years of experience working in a professional environment. Spearheaded an Evidence Extraction and Fact Verification model using Prompt Engineering and tested it with various LLMs. Invented a Black Box Optimization model for online hyperparameter fine-tuning, significantly enhancing model performance in real-time applications.', 'skills': {'programming_languages': ['Python', 'SQL', 'NoSQL', 'C++', 'Java', 'HTML'], 'AI_tools': ['PyTorch', 'HuggingFace', 'TensorFlow', 'AutoML', 'SciKitLearn', 'NumPy'], 'LLMs': ['GPT', 'LLaMA', 'BLOOM', 'BERT'], 'data_engineering': ['AWS EC2', 'FireWorks AI', 'Lightning AI', 'Data Pipelines', 'Data Augmentatio

## Add CV and additional information
The current_cv instance keeps a record of the most recent updated Cover Letter and additional information on work experience, projects, achievements, and certifications. 

### Input arguments:
`resume`=Parsed resume (Dict). \
`cv` = Cover letter text. \
`add_skills` = Additional skills not mentioned in the resume (Dict). \
`add_work_experience` = Additional work experience not mentioned in the resume (Dict). \
`add_education` = Additional education not mentioned in the resume (Dict). \
`add_projects` = Additional Projects not mentioned in the resume (Dict). \
`add_achievements` = Additional achievements not mentioned in the resume (Dict). \
`add_certifications` = Additional certifications not mentioned in the resume (Dict). 

#### additional_information format:
```{``` \
```"information": text,``` \
```"organization": text,``` \
```"date": "dd/mm/yyyy - dd/mm/yyyy" (or) "dd/mm/yyyy"``` \
```}```

The job_listing instance keeps a record of the requirements and description of the job posting. 

In [5]:
print(parsed_resume.keys())

dict_keys(['name', 'contact', 'visa_status', 'summary', 'skills', 'education', 'research_experience', 'work_experience', 'certifications'])


## Resume Optimizer

In [6]:
def generate_resume(candidate_info, job_description):
    ro_system_prompt = 'You are a resume optimizer. \
    Given the candidate information(#candidate_info) and the job description(#job_description), \
    rewrite the current resume to highlight the relevant skills, experience, and quantifiable achievements that match the job description. \
    Provide the output in a JSON format.'
    
    ro_user_prompt = f'#candidate_info: {candidate_info} \
                    \n#job_description: {job_description}'
    
    ro_text_worker = TextWorker(client=client, model_id="gpt-4o-mini", sys_prompt=ro_system_prompt, user_prompt=ro_user_prompt, json_format=True)
    new_resume = ro_text_worker.get_output()
    return new_resume
# print(new_resume)

## Resume Screening
In this section, the generated resume is screened based on how informative it is w.r.t the job posting. 
The resume screener gathers the keywords from the job description where the new resume fails to address, and a question requesting the missing information to address the unfulfilled requirements. 


In [7]:
def screen_resume_CL(resume, cover_leter, job_description):
    ra_system_prompt = 'You are a resume and cover letter screener.\
                        given the current resume(#current_resume), cover letter(#current_cover_letter) and the \
                        job description(#job_description), \
                        provide keywords from the job description where the resume and the cover letter fail to address the requirements. \
                        Provide the keywords with a question describing how the resume and the cover letter fail to meet this requirement. \
                        Present the output in the following JSON output format: \
                        \n\t{\"keywords\": [{"keyword": system keyword, "question": system keyword question}]}'
    # ra_user_prompt = f'#current_resume: {record.resume} \
                        #job_description: {job_description}'
    
    ra_user_prompt = f'#current_resume: {resume} \
                        #job_description: {job_description} \
                        #current_cover_letter: {cover_leter}' 
    
    ra_text_worker = TextWorker(client=client, model_id="gpt-4o-mini", sys_prompt=ra_system_prompt, user_prompt=ra_user_prompt, json_format=True)
    system_insights = ra_text_worker.get_output()
    return system_insights
# print(system_insights)

## Cover Letter Generation
This is the code for generating the cover letter: 

In [8]:
def CL_generator(candidate_info, job_description):

    cl_system_prompt = "You are a cover letter generator, given the candidate information(#candidate_info) and job description(#job_description). \
                        Generate a cover letter relevant to the job description highlighting the candidate's skills and achievements"
    
    cl_user_prompt = f'#candidate_info: {candidate_info} \
                        #job_description: {job_description}'
    
    
    cl_text_worker = TextWorker(client=client, model_id="gpt-4o-mini", sys_prompt=cl_system_prompt, user_prompt=cl_user_prompt)
    new_cl = cl_text_worker.get_output()
    return new_cl
# print(new_cl)

## Add additional knowledge from 

In [9]:
old_cl = "Hello Hiring Team, \nI am pursuing my Ph.D. in the Computer Engineering department at Stevens Institute of Technology; \
            my research is focused on Natural Language Processing. \
            I work on applications related to the areas of Misinformation Research and Investigative Journalism using Evidence Extraction, \
            Fact Verification and Key-Phrase analysis; I research and work with Generative Models, Deep Learning models, Attention Networks, \
            and Large Language Models for Extraction and Verification Tasks. \
            My Dissertation Project is based on Fact-Verification and Evidence-Extraction using an Explainable Prompt-Engineering Model \
            I proposed. \nEven though I chose the use cases of NLP as the focus of my research, \
            my work is more comprehensive than NLP-based models; \
            I have also created a low-cost Derivative-Free Optimization Method for parameter optimization in ML; \
            I am working on obtaining a Provisional Patent for this research. \
            I am immensely passionate about learning and analyzing the underlying Mathematical Concepts of Machine Learning Models; \
            I like educating myself on all the specifics of any problem statement, following the motivation of the research, \
            chronological evolution of the Problem-Solution framework, and feature analysis from historical to the SOTA models. \
            \nI am currently seeking a job opportunity to hone my skills and work with a proficient team for an inspirational \
            cause where AI can be efficacious. \nRegards, \nHarish Sista, Ph.D. candidate, \nComputer Engineering Department, \
            \nStevens Institute of Technology."

record = Record(resume=parsed_resume, cl=old_cl)
# with open('job_description.txt', 'r') as f:
#     job_description = f.read()

job_description = "We are seeking a talented AI/ML Developer to join our innovative team. \
            The ideal candidate will have strong expertise in machine learning algorithms, \
            data preprocessing, model development, fine-tuning large language models (LLMs), \
            and working with big data. You will work on creating, deploying, \
            and optimizing AI models to solve real-world problems and enhance our products.\n \
            \nResponsibilities: \
            \n\nDesign and develop machine learning models. \
            \n\nPreprocess and analyze large datasets. \
            \nImplement and optimize algorithms for performance and scalability. \
            \nFine-tune large language models (LLMs) for specific applications. \
            \nManage and analyze big data to derive insights and improve models. \
            \nCollaborate with cross-functional teams to integrate AI solutions. \
            \nStay up-to-date with the latest advancements in AI and ML technologies. \
            \n\nRequirements: \
            \n5+ years of experience in working with AI/LLMs. \
            \nBachelor\'s or Master\'s degree in Computer Science, Data Science, or a related field. \
            \nProficiency in Python and ML frameworks such as TensorFlow, PyTorch, or scikit-learn. \
            \nExperience with data preprocessing and feature engineering. \
            \nStrong understanding of machine learning algorithms and techniques. \
            \nProven experience in fine-tuning large language models. \
            \nExperience with big data tools and technologies (e.g., Hadoop, Spark). \
            \nExcellent problem-solving and analytical skills. \
            \nWhat we offer: \
            \n\nCompetitive salary that reflects your skills, experience, and contributions to the company. \
            \nFlexible working hours and remote work options to support your personal and professional life. \
            \nFull reimbursement for business-related travel expenses for group meet-ups. \
            \nIndividual benefits and bonuses. \
            \nJoin us to work on cutting-edge AI projects and make a significant impact in a dynamic and collaborative environment!"

In [10]:
key_questions = screen_resume_CL(record.resume, record.cl, job_description)
print(key_questions)

{'keywords': [{'keyword': '5+ years of experience in working with AI/LLMs', 'question': 'How does your resume demonstrate that you have at least 5 years of experience working with AI/LLMs, especially considering your work experience mentions significantly less?'}, {'keyword': 'Experience with big data tools and technologies (e.g., Hadoop, Spark)', 'question': 'Can you clarify how your experience with big data tools aligns with the requirement for familiarity with tools like Hadoop or Spark, as these do not appear to be addressed in your resume or cover letter?'}, {'keyword': 'Proficiency in Python and ML frameworks such as TensorFlow, PyTorch, or scikit-learn', 'question': 'Your resume lists proficiency in Python and various AI tools, but how do you illustrate your proficiency specifically in TensorFlow, PyTorch, or scikit-learn?'}, {'keyword': 'Experience with data preprocessing and feature engineering', 'question': 'How does your experience with data preprocessing and feature enginee

In [13]:
add_knowledge = QA_Knowledge(0, key_questions)

key_q2 = {'keywords': 
          [{'keyword': 'data preprocessing and feature engineering', 
            'question': 'Can you explain how your resume reflects proficiency in data preprocessing and feature engineering techniques?, \
            particularly scikit-learn?'}, 
           {'keyword': 'collaborate with cross-functional teams to integrate AI solutions', 
            'question': "Can you provide examples from your resume that demonstrate your experience \
                  collaborating with cross-functional teams to integrate AI solutions?"} 
           ]}

add_knowledge.add_key_questions(2, key_q2)


key_knowledge = {'5+ years of experience in working with AI/LLMs': 
                 {'question': 'How does the resume demonstrate at least 5 years of experience specifically working with \
                 AI and large language models (LLMs) in a direct capacity?',
                    'answer': 'As a part of my six years of PhD research in NLP, I have worked on various projects based on knowledge extraction from scholarly data and evidence extraction and fact verification of social-media data using web-scraped real-world data. I used LLMs for building all these projects.'
                 }, 
                 'data preprocessing and feature engineering':{ 
                'question': 'Can you explain how your resume reflects proficiency in data preprocessing and feature engineering techniques?',
                 'answer': 'In my PhD program, I have worked on different varieties of datasets like medical data(CORD-19, PubMed), \
                 social media data(CoVerifi), scholarly data(FEVER-dataset), and real-world data(AVeriTec). \
                 All these datasets use different feature spaces. To process these datasets \
                 I have used various feature engineering techniques like Prompt Engineering, Entity Extraction, key-word extraction, \
                 parts-of-speech tagging and text denoising using Python\'s RegEx library.'}, 
                 'Experience with big data tools and technologies (e.g., Hadoop, Spark)': { 
                  'question': 'How does the resume and cover letter demonstrate experience with big data tools and \
                  technologies such as Hadoop or Spark?',
                 'answer': None}, 
                 'strong understanding of machine learning algorithms and techniques':{
                  'question': 'How does your resume showcase a strong understanding of various machine learning \
                  algorithms and their techniques?',
                 'answer': None}, 
                 'collaborate with cross-functional teams to integrate AI solutions': { 
                  'question': 'Can you provide examples from your resume that demonstrate your experience \
                  collaborating with cross-functional teams to integrate AI solutions?',
                 'answer': 'In my previous job at \'Fresh Digital Group,\' I have collaborated with various teams like content writers \
                 and UI designers to brainstorm on building structure for chat applications and designing UI for mobile applications.'}}
print(type(key_knowledge))
add_knowledge.add_knowledge(key_knowledge)
print(add_knowledge.get_knowledge())



<class 'dict'>
{'5+ years of experience in working with AI/LLMs': {'id': 0, 'answer': 'As a part of my six years of PhD research in NLP, I have worked on various projects based on knowledge extraction from scholarly data and evidence extraction and fact verification of social-media data using web-scraped real-world data. I used LLMs for building all these projects.'}, 'data preprocessing and feature engineering': {'id': 2, 'answer': "In my PhD program, I have worked on different varieties of datasets like medical data(CORD-19, PubMed),                  social media data(CoVerifi), scholarly data(FEVER-dataset), and real-world data(AVeriTec).                  All these datasets use different feature spaces. To process these datasets                  I have used various feature engineering techniques like Prompt Engineering, Entity Extraction, key-word extraction,                  parts-of-speech tagging and text denoising using Python's RegEx library."}, 'collaborate with cross-function

In [14]:
record.add_knowledge(add_knowledge.get_knowledge())

print(record.get_record())

TypeError: 'dict' object is not callable