In [1]:
import os
import json
import openai
import urllib

from retry import retry
from IPython.display import Markdown

from llama_index.core import SimpleDirectoryReader
from llama_index.core.tools import FunctionTool
from llama_index.core.schema import Document, MetadataMode
from llama_index.core.indices import SummaryIndex
from llama_index.core.text_splitter import SentenceSplitter
from llama_index.core.prompts import PromptTemplate
from llama_index.llms.openai import OpenAI
from llama_index.agent.openai import OpenAIAgent

from dotenv import load_dotenv
load_dotenv()

from config import DATA_DIR

# CV Uploader

In [2]:
CV_FILE_PATH = os.path.join(DATA_DIR, "sample_cv.pdf")

cv_data = SimpleDirectoryReader(
    input_files = [CV_FILE_PATH]).load_data()

In [3]:
def combine_documents(
    pages: Document
) -> str:
    combined_page_content = ""
    for page in pages:
        combined_page_content += page.get_content(metadata_mode = MetadataMode.LLM)
    return combined_page_content

In [29]:
LLM = OpenAI(model="gpt-4o", max_tokens=4096)
EXTRACTION_LLM = OpenAI(model="gpt-4o-mini", max_tokens=4096)
CRITIQUE_LLM = OpenAI(model="o1-preview", max_completion_tokens=40000)

# JD Extractor

In [27]:
@retry(tries=5)
def extract_url(
    url: str
) -> str:
    f = urllib.request.urlopen(url)
    url_content = f.read()
    return url_content.decode('utf-8')

def extract_job_description_from_url(
    url: str
):
    """
    Use this function to extract the job description from the url
    """

    MAX_CHUNK_SIZE = 128000
    extraction_llm = EXTRACTION_LLM
    sentence_splitter = SentenceSplitter(chunk_size = MAX_CHUNK_SIZE)
    
    url_content = extract_url(url)
    jd_index = SummaryIndex.from_documents(
        documents=[Document(text=url_content)],
        transformations=[sentence_splitter.get_nodes_from_documents],
    )

    jd_extractor_query_engine = jd_index.as_query_engine(
        llm=extraction_llm
    )

    jd = jd_extractor_query_engine.query("Extract Job Information from the HTML text given under context. Return empty string if there is no job description found from the url")
    return jd.response

In [30]:
JD_DESCRIPTION_SYSTEM_PROMPT = """You are an HR specialist with expertise in building effective resumes.
You will be given a job description in text, which may contain URL links to the job description and requirements.
If there are URLs relevant to describing job description and requirements, use the extraction tool to extract the information.
Append the relevant information collected from the URLs to the original job description only if the content extracted are relevant.

If there is no relevant information, return 'Please provide a valid job description in as text or URL link'
"""

jd_extraction_tool = FunctionTool.from_defaults(
    fn=extract_job_description_from_url
)

jd_extraction_agent = OpenAIAgent.from_tools(
    [jd_extraction_tool],
    llm=LLM,
    system_prompt=JD_DESCRIPTION_SYSTEM_PROMPT
)

In [31]:
jd_response = jd_extraction_agent.chat(
    "Data Scientist"
)

# Content Critique

In [32]:
CV_CRITIQUE_PROMPT = """You are an HR specialist with expertise in building effective resumes.
You will be given a job description and a resume. Based on the job description, critique the resume by focusing on the following:

- How well the resume highlights the required skills and qualifications.
- Areas where the resume could better align with the job description.
- Suggestions for enhancing the structure, formatting, or presentation.
- Any missing or underemphasized experiences or accomplishments that could strengthen the resume.
- Also analyse if there are unnecessary content which does not provide values to the resume with respect to the job description.

Be specific in your feedback and suggest actionable improvements. Also consider the job level of the resume and the job description. If you think that the resume is not suitable for the job, please explain why.

<START OF RESUME>
{resume}
</END OF RESUME>

<START OF JOB DESCRIPTION>
{job_description}
</END OF JOB DESCRIPTION>
"""


CV_CRITIQUE_PROMPT_TEMPLATE = PromptTemplate(CV_CRITIQUE_PROMPT)

jd = extract_job_description_from_url(
    "https://www.grab.careers/en/jobs/744000010322415/senior-data-scientist/"
)

response = CRITIQUE_LLM.complete(
    CV_CRITIQUE_PROMPT_TEMPLATE.format(
        resume=combine_documents(cv_data),
        job_description=jd
    )
)

In [33]:
jd

"**Job Title:** Senior Data Scientist  \n**Location:** Singapore, Singapore  \n**Contract Type:** Full-time  \n\n**Job Description:**  \nGet to Know the Team  \nOur Data Science team develops creative and scalable machine learning algorithms in Natural Language Processing (NLP) and Computer Vision, which are critical to Grab's business.\n\nGet to Know the Role  \nAs a Senior Data Scientist, you will report into the Senior Data Science Manager and be based onsite in Singapore. You will dive deep into big datasets, develop efficient algorithms, and deploy solutions. Your work will directly impact our business operations and drive innovation.\n\n**Critical Tasks You Will Perform:**  \n- Prepare large datasets for model building and training.  \n- Develop efficient and scalable deep learning algorithms.  \n- Evaluate algorithm performance on text and image datasets.  \n- Deploy machine learning solutions to production platforms.  \n- Stay updated with the latest research in Large Language 

# Layout Critique

In [44]:
from llama_index.multi_modal_llms.openai import OpenAIMultiModal
from llama_index.core.schema import ImageDocument
from llama_index.core.prompts import ChatMessage, MessageRole
from llama_index.multi_modal_llms.openai.utils import generate_openai_multi_modal_chat_message
from pdf2image import convert_from_path
from PIL import Image
import base64
import io

cv_images = convert_from_path(CV_FILE_PATH, dpi=300)

multi_model_llm = OpenAIMultiModal(
    model="gpt-4o",
    max_new_tokens=1028,
    temperature=0.2,
    image_detail="high"
)

In [34]:
def convert_PIL_to_base64(image: Image) -> str:
    buffer = io.BytesIO()
    image.save(buffer, format="PNG")
    buffer.seek(0)
    base64_image = base64.b64encode(buffer.getvalue()).decode("utf-8")
    
    return base64_image

In [35]:
image_documents = [
    ImageDocument(image=convert_PIL_to_base64(cv_image)) for cv_image in cv_images
]

In [59]:
CV_LAYOUT_CRITIQUE_SYSTEM_PROMPT = """You are an honest and reliable HR specialist with expertise in building effective resumes.
You are not afraid to constructively comment on the weak aspects of the resume. Be honest, do not make up information.
You will be given a resume and optionally a job description. Your task is to critique the aesthetic aspects of the resume by focusing on the following:

1. Layout and Structure:
- Clarity and Organization: Use a clear, logical structure with well-defined sections.
- White Space: Incorporate sufficient white space between sections to avoid clutter and make the document breathable.
- Alignment: Maintain consistent alignment for headings, bullet points, and text blocks.
- Margins: Keep balanced margins for a clean and organized look.

2. Font choice, size and consistency:  Use professional, easy-to-read fonts, appropriate font size for headings and text contents. Use the same font throughout, reserving bold and italics for emphasis. Use font size, bolding, and spacing to guide the reader’s eye through the resume, ensuring the most important information (e.g., your name, job titles) stands out.

3. Color Scheme: Choice of color for texts, highlights, headings, etc.

4. Visual Elements: The usage appropriateness of icons or graphics. Design suitability for the job description and professionalism.

5. Use of Bold and Italics to highlight important information such as names, job titles, section headers

6. Consistency: Consistent formatting for dates, locations, and bullet points across different sections. Keep equal spacing between headings and paragraphs to ensure readability.

7. Length and Page Breaks: Whether the resume's length is appropriate. Are page breaks clean between sections, without splitting information awkwardly across pages.

8. Scannability:
Bullet Points: Use bullet points to break up large blocks of text, making it easy for the recruiter to scan.
Short Sentences: Keep sentences concise to improve readability.

Be specific in your feedback. If possible, suggest actionable improvements, only if the improvements have not been done by the original resume.
"""

In [60]:
messages = [
    ChatMessage(content=CV_LAYOUT_CRITIQUE_SYSTEM_PROMPT, role=MessageRole.SYSTEM),
    ChatMessage(content=f"# Job description:\n\n{jd}", role=MessageRole.SYSTEM),
    generate_openai_multi_modal_chat_message(
        prompt = "resume",
        role = "user",
        image_documents=image_documents,
        image_detail="high"
        )
]


In [61]:
response = multi_model_llm.chat(messages)

In [63]:
response.message.content

'### Resume Critique\n\n**1. Layout and Structure:**\n- **Clarity and Organization:** The resume is well-organized with clear sections for Work Experience, Technical Skills, Personal Projects, Education, and Others. Each section is clearly labeled, making it easy to navigate.\n- **White Space:** There is a good amount of white space between sections, which helps to avoid clutter and makes the document breathable.\n- **Alignment:** The alignment is consistent throughout the document. However, the alignment of dates could be improved to ensure they are all right-aligned for better readability.\n- **Margins:** The margins are balanced, providing a clean and organized look.\n\n**2. Font Choice, Size, and Consistency:**\n- **Font Choice:** The font choice is professional and easy to read.\n- **Font Size:** The font size is appropriate for both headings and text content.\n- **Consistency:** The font is consistent throughout the resume. However, the use of bold for job titles and company name

# Workflow to combine Layout and Content Critiques

# Chatbot

In [4]:
CHATBOT_LLM = OpenAI(model="gpt-4o-mini", max_tokens=512, temperature=0.2)

In [5]:
from llama_index.core.prompts import ChatMessage, MessageRole

messages = [
    ChatMessage(content="This is a conversation between a human and an AI. The AI is helpful, creative, clever, and very friendly.", role=MessageRole.SYSTEM),
    ChatMessage(content="What's 1 + 1?", role=MessageRole.USER),
    ChatMessage(content="2", role=MessageRole.ASSISTANT),
    ChatMessage(content="I am fine.", role=MessageRole.USER),
]

In [6]:
response = CHATBOT_LLM.chat(messages)

In [9]:
response.message.content

"I'm glad to hear that! How's your day going?"