In [39]:
# The imports

import os
from dotenv import load_dotenv
from agents import Agent, Runner, trace
from pydantic import BaseModel, EmailStr, Field
from typing import List, Optional

from pypdf import PdfReader
from IPython.display import Markdown, display

from openai import OpenAI
import json



In [25]:
load_dotenv(override=True)

True

In [26]:
openai_api_key = os.getenv('OPENAI_API_KEY')
if openai_api_key:
    print(f"OpenAI API Key exists and begins {openai_api_key[:8]}")
    

OpenAI API Key exists and begins sk-proj-


In [27]:
# Read the resume
reader = PdfReader("data/Saaniya_Desai_-_.pdf")

resume = ""
for page in reader.pages:
    text = page.extract_text()
    if text:
        resume += text


In [28]:
print(resume)

Saaniya Desai
github.com/saaniyadesai, saaniya.desai@gmail.com
E X P E R I E N C E 
May 2024 — Sep 2025 MSc Bioinformatics, The Hughes Group University of Glasgow
• Project title: "A Machine Learning Model for Virus Integration Site V alidation"
• Developed a machine learning model to validate viral integration sites in cancer genomes, enhancing 
detection accuracy for genomic research.
• Analyzed large-scale viral integration datasets to uncover host-virus interaction trends with potential 
therapeutic relevance.
May 2023 — Sep 2024 MSc Precision Medicine, The Le Quesne Group University of Glasgow
• Project title: "Artificial Intelligence in the Analysis of High-Resolution Mesothelioma Tissue Microarrays"
• T rained a self-supervised AI model to discover recurrent morphologies related to patient outcome
• Applied advanced image analysis to support translational oncology research.
Jan 2023 — May 2023 BSc Cell Biology Project, The Marston Lab University of Edinburgh 
• Project title: "C

In [41]:
# Create a pydentic model for the resume: include the following fields:
# - name
# - email
# - phone
# - linkedin
# - github
# - list of skills
# - list of experiences
# - list of education
# - list of projects
# - list of certifications
# - list of publications
# - list of patents
# - summary of resume in 2-3 sentences
# - list of all the relevant keywords that can be used to search jobs online

class Experience(BaseModel):
    title: str = Field(..., description="Job title or position held")
    company: Optional[str] = Field(None, description="Name of the company or organization")
    start_date: Optional[str] = Field(None, description="Start date of the experience")
    end_date: Optional[str] = Field(None, description="End date of the experience")
    description: Optional[str] = Field(None, description="Brief description of responsibilities and achievements")

class Education(BaseModel):
    degree: str = Field(..., description="Degree or qualification obtained")
    institution: Optional[str] = Field(None, description="Name of the educational institution")
    start_date: Optional[str] = Field(None, description="Start date of the education")
    end_date: Optional[str] = Field(None, description="End date of the education")
    description: Optional[str] = Field(None, description="Brief description of coursework or achievements")

class Project(BaseModel):
    name: str = Field(..., description="Name of the project")
    description: Optional[str] = Field(None, description="Brief description of the project")
    link: Optional[str] = Field(None, description="URL or link to the project")

class Certification(BaseModel):
    name: str = Field(..., description="Name of the certification")
    issuer: Optional[str] = Field(None, description="Issuing organization or authority")
    date: Optional[str] = Field(None, description="Date the certification was obtained")

class Publication(BaseModel):
    title: str = Field(..., description="Title of the publication")
    publisher: Optional[str] = Field(None, description="Publisher or journal name")
    date: Optional[str] = Field(None, description="Date of publication")
    link: Optional[str] = Field(None, description="URL or link to the publication")

class Patent(BaseModel):
    title: str = Field(..., description="Title of the patent")
    number: Optional[str] = Field(None, description="Patent number")
    date: Optional[str] = Field(None, description="Date the patent was granted")
    description: Optional[str] = Field(None, description="Brief description of the patent")

class ResumeModel(BaseModel):
    name: str = Field(..., description="Full name of the candidate")
    email: EmailStr = Field(..., description="Email address of the candidate")
    phone: Optional[str] = Field(None, description="Phone number of the candidate")
    linkedin: Optional[str] = Field(None, description="LinkedIn profile URL")
    github: Optional[str] = Field(None, description="GitHub profile URL")
    skills: List[str] = Field(default_factory=list, description="List of skills")
    experiences: List[Experience] = Field(default_factory=list, description="List of professional experiences")
    education: List[Education] = Field(default_factory=list, description="List of educational qualifications")
    projects: List[Project] = Field(default_factory=list, description="List of projects")
    certifications: List[Certification] = Field(default_factory=list, description="List of certifications")
    publications: List[Publication] = Field(default_factory=list, description="List of publications")
    patents: List[Patent] = Field(default_factory=list, description="List of patents")
    summary: Optional[str] = Field(None, description="Summary of the resume in 2-3 sentences")
    keywords: List[str] = Field(default_factory=list, description="List of relevant keywords for job search")
    target_company_profile: Optional[str] = Field(None, description="Profile/sector of the target company used for job search")


In [30]:
MODEL = "gpt-5-mini"

In [42]:
def system_prompt_4_resume_profiler():
    return f"""
    You are ResumeProfiler, an expert recruiting copilot.

    GOAL
    - Read the included resume of the candidate (plain text extracted from a PDF).
    - Produce resume data into a structured format that conforms to the JSON schema and can be used for job search.
      - Resume data conforms to the following JSON schema represented by the Pydantic model ResumeModel:
        {ResumeModel.model_json_schema()}

    RULES
    - SUMMARY: It is IMPORTANT to produce a concise but accurate hiring-manager summary (5–7 bullet points) of the candidate's resume.
    - KEYWORDS: It is IMPORTANT to produce a list of Boolean search strings that would retrieve relevant roles (AND/OR/quotes/site filters).
    - Never hallucinate - prefer nulls over guesses.

    OUTPUT
    - Must strictly follow the provided JSON Schema (no extra fields). Please produce only the json scheme, nothing else.
    """

print(system_prompt_4_resume_profiler())


    You are ResumeProfiler, an expert recruiting copilot.

    GOAL
    - Read the included resume of the candidate (plain text extracted from a PDF).
    - Produce resume data into a structured format that conforms to the JSON schema and can be used for job search.
      - Resume data conforms to the following JSON schema represented by the Pydantic model ResumeModel:
        {'$defs': {'Certification': {'properties': {'name': {'description': 'Name of the certification', 'title': 'Name', 'type': 'string'}, 'issuer': {'anyOf': [{'type': 'string'}, {'type': 'null'}], 'default': None, 'description': 'Issuing organization or authority', 'title': 'Issuer'}, 'date': {'anyOf': [{'type': 'string'}, {'type': 'null'}], 'default': None, 'description': 'Date the certification was obtained', 'title': 'Date'}}, 'required': ['name'], 'title': 'Certification', 'type': 'object'}, 'Education': {'properties': {'degree': {'description': 'Degree or qualification obtained', 'title': 'Degree', 'type': 'str

In [32]:
def user_prompt_4_resume_profiler(resume_text):
    return f"""
    Here is the candidate resume text extracted from PDF.
    Analyze ONLY content between the following markers: --- RESUME START --- and --- RESUME END ---.
    --- RESUME START ---
    {resume_text}
    --- RESUME END ---
    """

print(user_prompt_4_resume_profiler(resume))


    Here is the candidate resume text extracted from PDF.
    Analyze ONLY content between the following markers: --- RESUME START --- and --- RESUME END ---.
    --- RESUME START ---
    Saaniya Desai
github.com/saaniyadesai, saaniya.desai@gmail.com
E X P E R I E N C E 
May 2024 — Sep 2025 MSc Bioinformatics, The Hughes Group University of Glasgow
• Project title: "A Machine Learning Model for Virus Integration Site V alidation"
• Developed a machine learning model to validate viral integration sites in cancer genomes, enhancing 
detection accuracy for genomic research.
• Analyzed large-scale viral integration datasets to uncover host-virus interaction trends with potential 
therapeutic relevance.
May 2023 — Sep 2024 MSc Precision Medicine, The Le Quesne Group University of Glasgow
• Project title: "Artificial Intelligence in the Analysis of High-Resolution Mesothelioma Tissue Microarrays"
• T rained a self-supervised AI model to discover recurrent morphologies related to patient out

In [43]:
def profile_resume(resume_text):
    openai = OpenAI()

    system_prompt = system_prompt_4_resume_profiler()
    user_prompt = user_prompt_4_resume_profiler(resume_text)
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}]
    )
    return response.choices[0].message.content


In [44]:
# Use the profile_resume function to profile the resume
profiled_resume_data = profile_resume(resume)
print(profiled_resume_data)


{
  "name": "Saaniya Desai",
  "email": "saaniya.desai@gmail.com",
  "phone": null,
  "linkedin": null,
  "github": "https://github.com/saaniyadesai",
  "skills": [
    "Python",
    "R",
    "SQL",
    "Bash",
    "Git",
    "Docker",
    "HPC environments",
    "Variant calling",
    "Alignment tools",
    "Genomic data visualization",
    "Single-cell transcriptomics",
    "Spatial transcriptomics",
    "Multi-omics integration",
    "Genomic databases (NCBI, UCSC Table Browser, COSMIC)",
    "Machine learning",
    "Self-supervised learning",
    "Unsupervised learning",
    "Workflow management (Snakemake, Nextflow)",
    "Containerization",
    "Version control"
  ],
  "experiences": [
    {
      "title": "MSc Bioinformatics Research (The Hughes Group)",
      "company": "University of Glasgow (The Hughes Group)",
      "start_date": "May 2024",
      "end_date": "Sep 2025",
      "description": "Developed a machine learning model to validate viral integration sites in cancer ge

In [45]:
# Use the profiled_resume_data to search for jobs using summary and keywords.
# Use OpenAI Agents to search for jobs using tools. Suggest 3-4 tools that can be used to search for jobs. 
# Let's suggest 3-4 tools that can be used to search for jobs using OpenAI Agents.
# These tools can be invoked by the agent to perform job search tasks based on the profiled_resume_data.

job_search_tools = [
    {
        "name": "SerpAPI",
        "description": "A tool to search for jobs using Google Search results. Useful for finding job postings from various sources.",
        "usage": "Search for jobs by title, location, and keywords extracted from the resume summary and keywords."
    },
    {
        "name": "LinkedIn Job Search API",
        "description": "A tool to search for jobs directly on LinkedIn. Useful for finding professional and network-based job opportunities.",
        "usage": "Query LinkedIn for jobs matching the candidate's skills, experience, and preferred locations."
    },
    {
        "name": "Indeed Job Search API",
        "description": "A tool to search for jobs on Indeed. Useful for aggregating job postings from multiple employers and industries.",
        "usage": "Use keywords and summary from the resume to find relevant job postings on Indeed."
    },
    {
        "name": "Glassdoor Job Search API",
        "description": "A tool to search for jobs and company reviews on Glassdoor. Useful for finding jobs and researching company culture.",
        "usage": "Search for jobs and review company ratings using the candidate's preferences."
    }
]

print("Suggested tools for job search using OpenAI Agents:")
for tool in job_search_tools:
    print(f"- {tool['name']}: {tool['description']}")

try:
    profiled_resume_json = json.loads(profiled_resume_data)
    resume_summary = profiled_resume_json.get("summary", "")
    resume_keywords = profiled_resume_json.get("keywords", [])
except (json.JSONDecodeError, AttributeError):
    resume_summary = ""
    resume_keywords = []

# Construct a prompt for the agent
agent_prompt = f"""
You are an AI job search assistant. Use the following tools to find relevant job postings for the candidate:

Tools:
{chr(10).join([f"- {tool['name']}: {tool['description']}" for tool in job_search_tools])}

Candidate Profile:
Summary: {resume_summary}
Keywords: {', '.join(resume_keywords)}

Instructions:
- Use the tools above to search for jobs that match the candidate's profile.
- Prioritize jobs that closely align with the summary and keywords.
- For each job found, provide the job title, company, location, and a brief description.
- If possible, include a link to the job posting.

Begin your search.
"""

print("\nAgent prompt for job search:")
print(agent_prompt)



Suggested tools for job search using OpenAI Agents:
- SerpAPI: A tool to search for jobs using Google Search results. Useful for finding job postings from various sources.
- LinkedIn Job Search API: A tool to search for jobs directly on LinkedIn. Useful for finding professional and network-based job opportunities.
- Indeed Job Search API: A tool to search for jobs on Indeed. Useful for aggregating job postings from multiple employers and industries.
- Glassdoor Job Search API: A tool to search for jobs and company reviews on Glassdoor. Useful for finding jobs and researching company culture.

Agent prompt for job search:

You are an AI job search assistant. Use the following tools to find relevant job postings for the candidate:

Tools:
- SerpAPI: A tool to search for jobs using Google Search results. Useful for finding job postings from various sources.
- LinkedIn Job Search API: A tool to search for jobs directly on LinkedIn. Useful for finding professional and network-based job oppo

In [37]:
# Create a pydentic model for the job search results:
# - job title
# - company
# - location
# - description
# - link
# - apply link  
# - Hiring manager name
# - list of all the relevant keywords that matched the job posting

class JobPosting(BaseModel):
    title: str = Field(..., description="Job title or position held")
    company: Optional[str] = Field(None, description="Name of the company or organization")
    location: Optional[str] = Field(None, description="Location of the job posting")
    description: Optional[str] = Field(None, description="Brief description of the job posting")
    link: Optional[str] = Field(None, description="URL or link to the job posting")
    apply_link: Optional[str] = Field(None, description="URL or link to the job application")
    hiring_manager_name: Optional[str] = Field(None, description="Name of the hiring manager")
    keywords: List[str] = Field(default_factory=list, description="List of relevant keywords for the job posting")


In [None]:
# Pydentic model for the job search plan
class JobSearchPlan(BaseModel):
    job_search_urls: List[str] = Field(default_factory=list, description="List of job search site URLs to be used for job search. Only return the URLs, no other text.")

# Get the job search planner agent
def get_job_search_planner_agent(profiled_resume_data_json):
    """
    Get the job search planner agent
    @param profiled_resume_data_json: The profiled resume data in JSON format
    @return: JSON object of the candidates profile
    """

    # Extract summary, keywords and target company profile from the profiled resume data
    resume_summary = profiled_resume_data_json.get("summary", "")
    resume_keywords = profiled_resume_data_json.get("keywords", [])
    company_profile = profiled_resume_data_json.get("target_company_profile", "")

    instructions = f"""
        You are an expert job search assistant. You are given a candidate's resume summary and keywords that represent the job search profile of the candidate and a target company profile.
        Your task is to plan a job search campaign by selecting the most relevant job search sites and creating a list of job search URLs.
        <SUMMARY>
        {resume_summary}
        </SUMMARY>
        <KEYWORDS>
        {', '.join(resume_keywords)}
        </KEYWORDS>
        <TARGET_COMPANY_PROFILE>
        {company_profile}
        </TARGET_COMPANY_PROFILE>

        Return the list of job search URLs in a JSON array format that can be used by the job_search_agent to search for jobs.
    """

    job_search_planner_agent = Agent(
        name="JobSearchPlannerAgent",
        instructions=instructions,
        model="gpt-41-mini",
        output_type=JobSearchPlan,
    )

    return job_search_planner_agent

