In [1]:
import os
import json
import openai
import urllib

from retry import retry
from IPython.display import Markdown

from llama_index.core import SimpleDirectoryReader
from llama_index.core.tools import FunctionTool
from llama_index.core.schema import Document, MetadataMode
from llama_index.core.indices import SummaryIndex
from llama_index.core.text_splitter import SentenceSplitter
from llama_index.core.prompts import PromptTemplate
from llama_index.llms.openai import OpenAI
from llama_index.agent.openai import OpenAIAgent

from dotenv import load_dotenv
load_dotenv()

from config import DATA_DIR

# CV Uploader

In [2]:
CV_FILE_PATH = os.path.join(DATA_DIR, "sample_cv.pdf")

cv_data = SimpleDirectoryReader(
    input_files = [CV_FILE_PATH]).load_data()

In [3]:
def combine_documents(
    pages: Document
) -> str:
    combined_page_content = ""
    for page in pages:
        combined_page_content += page.get_content(metadata_mode = MetadataMode.LLM)
    return combined_page_content

In [4]:
LLM = OpenAI(model="gpt-4o", max_tokens=4096)
EXTRACTION_LLM = OpenAI(model="gpt-4o-mini", max_tokens=4096)
CRITIQUE_LLM = OpenAI(model="o1-preview", max_completion_tokens=40000)

# JD Extractor

In [5]:
@retry(tries=5)
def extract_url(
    url: str
) -> str:
    f = urllib.request.urlopen(url)
    url_content = f.read()
    return url_content.decode('utf-8')

def extract_job_description_from_url(
    url: str
):
    """
    Use this function to extract the job description from the url
    """

    MAX_CHUNK_SIZE = 128000
    extraction_llm = EXTRACTION_LLM
    sentence_splitter = SentenceSplitter(chunk_size = MAX_CHUNK_SIZE)
    
    url_content = extract_url(url)
    jd_index = SummaryIndex.from_documents(
        documents=[Document(text=url_content)],
        transformations=[sentence_splitter.get_nodes_from_documents],
    )

    jd_extractor_query_engine = jd_index.as_query_engine(
        llm=extraction_llm
    )

    jd = jd_extractor_query_engine.query("Extract Job Information from the HTML text given under context. Return empty string if there is no job description found from the url")
    return jd.response

In [6]:
JD_DESCRIPTION_SYSTEM_PROMPT = """You are an HR specialist with expertise in building effective resumes.
You will be given a job description in text, which may contain URL links to the job description and requirements.
If there are URLs relevant to describing job description and requirements, use the extraction tool to extract the information.
Append the relevant information collected from the URLs to the original job description only if the content extracted are relevant.

If there is no relevant information, return 'Please provide a valid job description in as text or URL link'
"""

jd_extraction_tool = FunctionTool.from_defaults(
    fn=extract_job_description_from_url
)

jd_extraction_agent = OpenAIAgent.from_tools(
    [jd_extraction_tool],
    llm=LLM,
    system_prompt=JD_DESCRIPTION_SYSTEM_PROMPT
)

In [31]:
jd_response = jd_extraction_agent.chat(
    "Data Scientist"
)

# Content Critique

In [7]:
CV_CRITIQUE_PROMPT = """You are an HR specialist with expertise in building effective resumes.
You will be given a job description and a resume. Based on the job description, critique the resume by focusing on the following:

- How well the resume highlights the required skills and qualifications.
- Areas where the resume could better align with the job description.
- Suggestions for enhancing the structure, formatting, or presentation.
- Any missing or underemphasized experiences or accomplishments that could strengthen the resume.
- Also analyse if there are unnecessary content which does not provide values to the resume with respect to the job description.

Be specific in your feedback and suggest actionable improvements. Also consider the job level of the resume and the job description. If you think that the resume is not suitable for the job, please explain why.

<START OF RESUME>
{resume}
</END OF RESUME>

<START OF JOB DESCRIPTION>
{job_description}
</END OF JOB DESCRIPTION>
"""


CV_CRITIQUE_PROMPT_TEMPLATE = PromptTemplate(CV_CRITIQUE_PROMPT)

jd = extract_job_description_from_url(
    "https://www.grab.careers/en/jobs/744000010322415/senior-data-scientist/"
)

response = CRITIQUE_LLM.complete(
    CV_CRITIQUE_PROMPT_TEMPLATE.format(
        resume=combine_documents(cv_data),
        job_description=jd
    )
)

# Layout Critique

In [8]:
from llama_index.multi_modal_llms.openai import OpenAIMultiModal
from llama_index.core.schema import ImageDocument
from llama_index.core.prompts import ChatMessage, MessageRole
from llama_index.multi_modal_llms.openai.utils import generate_openai_multi_modal_chat_message
from pdf2image import convert_from_path
from PIL import Image
import base64
import io

cv_images = convert_from_path(CV_FILE_PATH, dpi=300)

multi_model_llm = OpenAIMultiModal(
    model="gpt-4o",
    max_new_tokens=1028,
    temperature=0.2,
    image_detail="high"
)

In [9]:
def convert_PIL_to_base64(image: Image) -> str:
    buffer = io.BytesIO()
    image.save(buffer, format="PNG")
    buffer.seek(0)
    base64_image = base64.b64encode(buffer.getvalue()).decode("utf-8")
    
    return base64_image

In [10]:
image_documents = [
    ImageDocument(image=convert_PIL_to_base64(cv_image)) for cv_image in cv_images
]

In [11]:
CV_LAYOUT_CRITIQUE_SYSTEM_PROMPT = """You are an honest and reliable HR specialist with expertise in building effective resumes.
You are not afraid to constructively comment on the weak aspects of the resume. Be honest, do not make up information.
You will be given a resume and optionally a job description. Your task is to critique the aesthetic aspects of the resume by focusing on the following:

1. Layout and Structure:
- Clarity and Organization: Use a clear, logical structure with well-defined sections.
- White Space: Incorporate sufficient white space between sections to avoid clutter and make the document breathable.
- Alignment: Maintain consistent alignment for headings, bullet points, and text blocks.
- Margins: Keep balanced margins for a clean and organized look.

2. Font choice, size and consistency:  Use professional, easy-to-read fonts, appropriate font size for headings and text contents. Use the same font throughout, reserving bold and italics for emphasis. Use font size, bolding, and spacing to guide the reader’s eye through the resume, ensuring the most important information (e.g., your name, job titles) stands out.

3. Color Scheme: Choice of color for texts, highlights, headings, etc.

4. Visual Elements: The usage appropriateness of icons or graphics. Design suitability for the job description and professionalism.

5. Use of Bold and Italics to highlight important information such as names, job titles, section headers

6. Consistency: Consistent formatting for dates, locations, and bullet points across different sections. Keep equal spacing between headings and paragraphs to ensure readability.

7. Length and Page Breaks: Whether the resume's length is appropriate. Are page breaks clean between sections, without splitting information awkwardly across pages.

8. Scannability:
Bullet Points: Use bullet points to break up large blocks of text, making it easy for the recruiter to scan.
Short Sentences: Keep sentences concise to improve readability.

Be specific in your feedback. If possible, suggest actionable improvements, only if the improvements have not been done by the original resume.
"""

In [13]:
messages = [
    ChatMessage(content=CV_LAYOUT_CRITIQUE_SYSTEM_PROMPT, role=MessageRole.SYSTEM),
    ChatMessage(content=f"# Job description:\n\n{jd}", role=MessageRole.SYSTEM),
    generate_openai_multi_modal_chat_message(
        prompt = "resume",
        role = "user",
        image_documents=image_documents,
        image_detail="high"
        )
]


In [61]:
response = multi_model_llm.chat(messages)

In [63]:
response.message.content

'### Resume Critique\n\n**1. Layout and Structure:**\n- **Clarity and Organization:** The resume is well-organized with clear sections for Work Experience, Technical Skills, Personal Projects, Education, and Others. Each section is clearly labeled, making it easy to navigate.\n- **White Space:** There is a good amount of white space between sections, which helps to avoid clutter and makes the document breathable.\n- **Alignment:** The alignment is consistent throughout the document. However, the alignment of dates could be improved to ensure they are all right-aligned for better readability.\n- **Margins:** The margins are balanced, providing a clean and organized look.\n\n**2. Font Choice, Size, and Consistency:**\n- **Font Choice:** The font choice is professional and easy to read.\n- **Font Size:** The font size is appropriate for both headings and text content.\n- **Consistency:** The font is consistent throughout the resume. However, the use of bold for job titles and company name

In [23]:
async def combine_analysis():
    content_response = await CRITIQUE_LLM.acomplete(
        CV_CRITIQUE_PROMPT_TEMPLATE.format(
            resume=combine_documents(cv_data),
            job_description=jd
        )
    )
    layout_response = await multi_model_llm.achat(messages)
    return content_response, layout_response

import asyncio
import nest_asyncio

nest_asyncio.apply()

analysis = asyncio.run(combine_analysis())

In [25]:
tasks = [
    CRITIQUE_LLM.acomplete(
        CV_CRITIQUE_PROMPT_TEMPLATE.format(
            resume=combine_documents(cv_data),
            job_description=jd
        )
    ),
    multi_model_llm.achat(messages)
]

content_response, layout_response = await asyncio.gather(*tasks)

In [22]:
content_response = CRITIQUE_LLM.complete(
    CV_CRITIQUE_PROMPT_TEMPLATE.format(
        resume=combine_documents(cv_data),
        job_description=jd
    )
)


In [21]:
response = multi_model_llm.chat(messages)

# Chatbot

In [4]:
CHATBOT_LLM = OpenAI(model="gpt-4o-mini", max_tokens=512, temperature=0.2)

In [5]:
from llama_index.core.prompts import ChatMessage, MessageRole

messages = [
    ChatMessage(content="This is a conversation between a human and an AI. The AI is helpful, creative, clever, and very friendly.", role=MessageRole.SYSTEM),
    ChatMessage(content="What's 1 + 1?", role=MessageRole.USER),
    ChatMessage(content="2", role=MessageRole.ASSISTANT),
    ChatMessage(content="I am fine.", role=MessageRole.USER),
]

In [6]:
response = CHATBOT_LLM.chat(messages)

In [9]:
response.message.content

"I'm glad to hear that! How's your day going?"

# CV Refine and Render

In [27]:
content_analysis_response = """\
### Overall Assessment

Le Quan's resume is quite strong, especially for a mid-level to senior Data Scientist role. It showcases a solid blend of technical skills, relevant work experience, and academic achievements. However, there are areas for improvement in terms of structure, formatting, and emphasis on key accomplishments.

### Highlights of Required Skills and Qualifications

**Strengths:**
- **Technical Skills:** The resume effectively lists a wide range of technical skills and tools, which are highly relevant for a Data Scientist role.
- **Work Experience:** The work experience section is detailed and demonstrates hands-on experience with advanced machine learning and data science projects.
- **Education:** The academic background is strong, with notable awards and a perfect GPA in a relevant master's program.

**Weaknesses:**
- **Soft Skills:** The resume lacks emphasis on soft skills such as teamwork, communication, and leadership, which are also important for senior roles.
- **Impact Metrics:** While some accomplishments include metrics (e.g., "reducing unnecessary scans expense by 15%"), not all achievements are quantified.

### Suggestions for Enhancing Structure, Formatting, and Presentation

**1. Contact Information:**
- **Current:** The contact information is clear but could be more concise.
- **Improvement:** Consider placing the contact information in a single line to save space.

**2. Professional Summary:**
- **Current:** Missing.
- **Improvement:** Add a professional summary at the top to provide a quick overview of your qualifications and career goals.

**3. Work Experience:**
- **Current:** Detailed but somewhat cluttered.
- **Improvement:** Use bullet points more effectively to separate different tasks and achievements. Ensure each bullet point starts with an action verb and is concise.

**4. Technical Skills:**
- **Current:** Comprehensive but could be better organized.
- **Improvement:** Group skills into categories (e.g., Programming Languages, Libraries, Platforms) for easier readability.

**5. Personal Projects:**
- **Current:** Detailed but could be overwhelming.
- **Improvement:** Summarize the most impactful projects and focus on those that are most relevant to the job you are applying for.

**6. Education:**
- **Current:** Well-presented.
- **Improvement:** No major changes needed, but consider adding relevant coursework or projects if space allows.

**7. Others:**
- **Current:** Brief but useful.
- **Improvement:** Consider adding any relevant certifications or professional memberships.

### Missing or Underemphasized Experiences or Accomplishments

**1. Leadership and Teamwork:**
- **Current:** Not explicitly mentioned.
- **Improvement:** Highlight any leadership roles or teamwork experiences, especially in collaborative projects or team management.

**2. Publications and Patents:**
- **Current:** Not mentioned.
- **Improvement:** If applicable, include any publications, patents, or contributions to open-source projects.

**3. Soft Skills:**
- **Current:** Not emphasized.
- **Improvement:** Mention soft skills such as problem-solving, communication, and adaptability, either in the professional summary or within job descriptions.

### Unnecessary Content

**1. Detailed Technical Descriptions:**
- **Current:** Some descriptions are overly technical and may not be easily understood by non-technical recruiters.
- **Improvement:** Simplify technical jargon where possible and focus on the impact of your work.

**2. Repetitive Information:**
- **Current:** Some skills and tools are mentioned multiple times.
- **Improvement:** Avoid redundancy by consolidating similar information.

### Actionable Improvements

1. **Add a Professional Summary:**
   - Example: "Data Scientist with over 5 years of experience in developing and deploying machine learning models in healthcare and industrial settings. Proven track record in optimizing processes and improving accuracy through advanced data analytics and AI techniques."

2. **Reorganize Technical Skills:**
   - Example:
     ```
     Technical Skills:
     - Programming Languages: Python, R, SQL
     - Libraries: Scikit-learn, TensorFlow, PyTorch
     - Platforms: Google Cloud, Docker, Kubernetes
     - Tools: Git, FastAPI, Langchain
     ```

3. **Quantify Achievements:**
   - Example: "Implemented a multi-modal RAG audit system to identify improper MRI scan orders, reducing unnecessary scans by 15% and saving radiologists 20 hours per month."

4. **Highlight Soft Skills and Leadership:**
   - Example: "Led a team of 5 data scientists in developing a multi-stage LLM copilot for recommending thyroid cancer treatments, achieving over 90% accuracy."

5. **Simplify Technical Descriptions:**
   - Example: "Developed a pipeline for handling various data types (texts, tables, images) to improve medical guideline retrieval."

By addressing these areas, Le Quan can create a more polished and impactful resume that effectively highlights his qualifications and readiness for a senior Data Scientist role."""

layout_analysis_response = """\
### Resume Critique

#### 1. Layout and Structure:
- **Clarity and Organization:** The resume is well-organized with clearly defined sections such as Work Experience, Technical Skills, Personal Projects, Education, and Others. This structure makes it easy to navigate.
- **White Space:** There is a good amount of white space between sections, which helps in avoiding clutter and makes the document breathable.
- **Alignment:** The alignment is consistent throughout the document, with headings, bullet points, and text blocks properly aligned.
- **Margins:** The margins are balanced, contributing to a clean and organized look.

#### 2. Font Choice, Size, and Consistency:
- **Font Choice:** The font choice is professional and easy to read.
- **Font Size:** The font size for headings and text content is appropriate. However, the contact information at the top could be slightly larger for better visibility.
- **Consistency:** The font is consistent throughout the resume, with bold and italics used appropriately for emphasis.

#### 3. Color Scheme:
- The color scheme is minimalistic, primarily using black text on a white background with blue highlights for section headers. This is professional and easy on the eyes.

#### 4. Visual Elements:
- **Icons/Graphics:** There are no icons or graphics used, which is appropriate for a professional resume.
- **Design Suitability:** The design is suitable for the job description and maintains a high level of professionalism.

#### 5. Use of Bold and Italics:
- Bold is used effectively to highlight important information such as job titles, company names, and section headers. Italics are used sparingly, which is good.

#### 6. Consistency:
- The formatting for dates, locations, and bullet points is consistent across different sections. Equal spacing between headings and paragraphs ensures readability.

#### 7. Length and Page Breaks:
- The resume is two pages long, which is appropriate given the amount of experience and skills listed. Page breaks are clean, without splitting information awkwardly across pages.

#### 8. Scannability:
- **Bullet Points:** Bullet points are used effectively to break up large blocks of text, making it easy for the recruiter to scan.
- **Short Sentences:** Sentences are concise, improving readability.

### Actionable Improvements:
1. **Contact Information Visibility:** Increase the font size of the contact information at the top for better visibility.
2. **Consistency in Date Format:** Ensure that the date format is consistent throughout the resume. For example, use either "Apr 2023 – Current" or "Apr 2023 – Present" consistently.
3. **Section Headers:** Consider adding a bit more spacing above section headers to further distinguish them from the content above.
4. **Technical Skills Section:** The technical skills section could benefit from a bit more organization, perhaps by grouping similar skills together or using subheadings.

Overall, this is a strong resume with a professional layout and clear organization. The suggested improvements are minor and aimed at enhancing readability and consistency."""

OVERALL_ANALYSIS = f"# Content Analysis\n{content_analysis_response}\n\n\n # Layout Analysis\n{layout_analysis_response}\n"

In [46]:
CV_REVIEW_PROMPT = """\You are a senior career advisor. You are given an original resume, (optionally) a job description and a critique on the strengths and weaknesses of the resume.
Your task is to use the critique to improve the resume. The improved version should address the weak points of the resume and implement the recommendations as needed.
The output should only contain the improved resume, nothing else. The improved resume should be formatted in formatted Markdown format.

{extra_instructions}

<START OF RESUME>
{resume}
<END OF RESUME>

<START OF JOB DESCRIPTION>
{job_description}
<END OF JOB DESCRIPTION>
s
<START OF CRITIQUE>
{critique}
<END OF CRITIQUE>

IMPROVED RESUME:
"""

CV_REVIEW_PROMPT_TEMPLATE = PromptTemplate(CV_REVIEW_PROMPT)

In [36]:
editted_cv = LLM.complete(
    CV_REVIEW_PROMPT.format(
        resume=combine_documents(cv_data),
        job_description=jd,
        critique=OVERALL_ANALYSIS
    )
).text