In [None]:
# The imports

import os
from enum import Enum
from dotenv import load_dotenv
from agents import Agent, Runner, trace, WebSearchTool, gen_trace_id, function_tool
from agents.mcp import MCPServerStdio
from pydantic import BaseModel, EmailStr, Field
from typing import List, Optional

from pypdf import PdfReader
from IPython.display import Markdown, display

from openai import OpenAI
import json
import asyncio
from contextlib import AsyncExitStack


In [None]:
load_dotenv(override=True)

In [None]:
openai_api_key = os.getenv('OPENAI_API_KEY')
if openai_api_key:
    print(f"OpenAI API Key exists and begins {openai_api_key[:8]}")
    
brave_api_key = os.getenv('BRAVE_API_KEY')
if brave_api_key:
    print(f"Brave API Key exists and begins {brave_api_key[:12]}")


In [None]:
MODEL = "gpt-5-mini"
#MODEL = "gpt-4.1-mini"

In [None]:
fetch_params = {"command": "uvx", "args": ["mcp-server-fetch"]}

async with MCPServerStdio(params=fetch_params, client_session_timeout_seconds=60) as server:
    fetch_tools = await server.list_tools()

print(fetch_tools)

In [None]:
[print(f"Name: {tool.name}\n\tDescription: {tool.description}\n") for tool in fetch_tools]

In [None]:
data_files_path = os.path.abspath(os.path.join(os.getcwd(), "../data"))
files_params = {"command": "npx", "args": ["-y", "@modelcontextprotocol/server-filesystem", data_files_path]}

async with MCPServerStdio(params=files_params,client_session_timeout_seconds=60) as server:
    file_tools = await server.list_tools()


In [None]:
len(file_tools)

In [None]:
for tool in file_tools:
    print(f"Name: {tool.name}\nDescription: {tool.description}\n")

In [None]:
# Create a pydentic model for the resume: include the following fields:
# - name
# - email
# - phone
# - linkedin
# - github
# - list of skills
# - list of experiences
# - list of education
# - list of projects
# - list of certifications
# - list of publications
# - list of patents
# - summary of resume in 2-3 sentences
# - list of all the relevant keywords that can be used to search jobs online

class ExperienceLevel(str, Enum):
    ENTRY = "entry" # 0-2 years
    JUNIOR = "junior" # 2-5 years
    MID = "mid" # 5-10 years    
    SENIOR = "senior" # 10-15 years
    LEAD = "lead" # 15+ years
    EXECUTIVE = "executive" # 20+ years

class Experience(BaseModel):
    title: str = Field(..., description="Job title or position held")
    company: Optional[str] = Field(None, description="Name of the company or organization")
    start_date: Optional[str] = Field(None, description="Start date of the experience")
    end_date: Optional[str] = Field(None, description="End date of the experience")
    description: Optional[str] = Field(None, description="Brief description of responsibilities and achievements")

class Education(BaseModel):
    degree: str = Field(..., description="Degree or qualification obtained")
    institution: Optional[str] = Field(None, description="Name of the educational institution")
    start_date: Optional[str] = Field(None, description="Start date of the education")
    end_date: Optional[str] = Field(None, description="End date of the education")
    description: Optional[str] = Field(None, description="Brief description of coursework or achievements")

class Project(BaseModel):
    name: str = Field(..., description="Name of the project")
    description: Optional[str] = Field(None, description="Brief description of the project")
    link: Optional[str] = Field(None, description="URL or link to the project")

class Certification(BaseModel):
    name: str = Field(..., description="Name of the certification")
    issuer: Optional[str] = Field(None, description="Issuing organization or authority")
    date: Optional[str] = Field(None, description="Date the certification was obtained")

class Publication(BaseModel):
    title: str = Field(..., description="Title of the publication")
    publisher: Optional[str] = Field(None, description="Publisher or journal name")
    date: Optional[str] = Field(None, description="Date of publication")
    link: Optional[str] = Field(None, description="URL or link to the publication")

class Patent(BaseModel):
    title: str = Field(..., description="Title of the patent")
    number: Optional[str] = Field(None, description="Patent number")
    date: Optional[str] = Field(None, description="Date the patent was granted")
    description: Optional[str] = Field(None, description="Brief description of the patent")

class ResumeModel(BaseModel):
    name: str = Field(..., description="Full name of the candidate")
    email: EmailStr = Field(..., description="Email address of the candidate")
    phone: Optional[str] = Field(None, description="Phone number of the candidate")
    linkedin: Optional[str] = Field(None, description="LinkedIn profile URL")
    github: Optional[str] = Field(None, description="GitHub profile URL")
    skills: List[str] = Field(default_factory=list, description="List of skills")
    experiences: List[Experience] = Field(default_factory=list, description="List of professional experiences")
    education: List[Education] = Field(default_factory=list, description="List of educational qualifications")
    projects: List[Project] = Field(default_factory=list, description="List of projects")
    certifications: List[Certification] = Field(default_factory=list, description="List of certifications")
    publications: List[Publication] = Field(default_factory=list, description="List of publications")
    patents: List[Patent] = Field(default_factory=list, description="List of patents")
    experience_level: ExperienceLevel = Field(..., description="Experience level of the candidate based on number of years of experience")
    summary: Optional[str] = Field(None, description="Summary of the resume in 2-3 sentences")
    keywords: List[str] = Field(default_factory=list, description="List of relevant keywords for job search")
    target_company_profile: Optional[str] = Field(None, description="Profile/sector of the target company used for job search")


# Create a pydentic model for the job search results:
# - job title
# - company
# - location
# - description
# - link
# - apply link  
# - Hiring manager name
# - list of all the relevant keywords that matched the job posting

class JobPosting(BaseModel):
    title: str = Field(..., description="EXACT Job title or position held")
    company: Optional[str] = Field(None, description="EXACT Name of the company or organization")
    location: Optional[str] = Field(None, description="EXACT Location of the job posting")
    description: Optional[str] = Field(None, description="Brief description of the job posting")
    link: Optional[str] = Field(None, description="URL or link to the job posting. (CRITICAL - this must be the actual job description page)")
    apply_link: Optional[str] = Field(None, description="URL or link to the job application. (CRITICAL - this must be the actual application link)")
    posted_date: Optional[str] = Field(None, description="Date the job posting was posted")
    application_deadline: Optional[str] = Field(None, description="Date the job application deadline")
    hiring_manager_name: Optional[str] = Field(None, description="Name of the hiring manager")
    keywords: List[str] = Field(default_factory=list, description="List of relevant keywords for the job posting")
    rating: Optional[float] = Field(None, description="Rating of the job posting from 0 to 10 based on the candidate's profile")

class JobPostingList(BaseModel):
    job_postings: List[JobPosting] = Field(default_factory=list, description="List of job postings")

In [None]:
@function_tool
async def read_resume(resume_file: str) -> str:
    """
    Read the resume from the given file path and return the text.
    @param resume_file: str - The path to the candidate's resume file
    Returns:
        str - The text of the resume
    """
    
    print(f"Reading resume from:\n {resume_file}")
    
    resume = ""
    try:
        # Read the resume
        reader = PdfReader(resume_file)
        for page in reader.pages:
            text = page.extract_text()
            if text:
                resume += text
    except Exception as e:
        print(f"Error reading resume from {resume_file}: {e}")
        raise ValueError(f"Error reading resume from {resume_file}: {e}")
        
    print(f"resume from function_tool:\n {resume}")
    return resume

class ResumeProcessorAgent:
    """
    Agent to process the resume and return the text.
    @param model: str - The model to use for the agent
    """
    def __init__(self, candidate_name: str, model: str = MODEL):
        self.candidate_name = candidate_name
        self.model = model

    def get_system_prompt(self) -> str:
        return f"""
        You are a resume processor agent, an expert recruiting copilot responsible to review the resume and create a job profile for the candidate that can be used for job search.
            
        Your goal is to create a job profile for the candidate that can be used for job search.

        GOAL
        - You are given a file path to a resume. You need to use the `read_resume` tool to read the resume and extract the text.
        - Review the included resume of the candidate (plain text extracted from a PDF using the `read_resume` tool).
        - Produce resume data into a structured format that conforms to the JSON schema and can be used for job search.
        - Resume data conforms to the following JSON schema represented by the Pydantic model ResumeModel:
            {ResumeModel.model_json_schema()}

        RULES
        - SUMMARY: It is IMPORTANT to produce a concise but accurate hiring-manager summary (5–7 bullet points) of the candidate's resume.
        - KEYWORDS: It is IMPORTANT to produce a list of Boolean search strings that would retrieve relevant roles (AND/OR/quotes/site filters). Ignore the company names in the keywords.
        - ExperienceLevel - It is IMPORTANT to produce the experience level of the candidate based on the number of years of experience. Ignore the internship or fellowship experience.
        - ENTRY if the candidate is just graduating from college (check the education section) or has less than 2 years of experience, 
        - JUNIOR if the candidate has 2-5 years of experience, 
        - MID if the candidate has 5-10 years of experience, 
        - SENIOR if the candidate has 10-15 years of experience, 
        - LEAD if the candidate has 15+ years of experience, 
        -EXECUTIVE if the candidate has 20+ years of experience.
        - Never hallucinate - prefer nulls over guesses.

        OUTPUT
        - Must strictly follow the provided JSON Schema (no extra fields). Please produce only the json scheme, nothing else.
        """

    def get_resume_file_path(self) -> str:
        return f"../data/{self.candidate_name}.pdf"

    def get_user_prompt(self) -> str:
        return f"""
        Use the text extracted from the PDF resume using the `read_resume` tool to create a job profile for the candidate that can be used for job search.
        read_resume function_tool is available to use and takes a single argument resume_file which is the path to the candidate's resume file. 
        The resume file path is: {self.get_resume_file_path()}
        """

    def get_resume_processor_agent(self) -> Agent:
        """
        This function is used to profile the resume and create a job profile for the candidate that can be used for job search.
        It create an agent that uses the `read_resume` tool to read the resume and calls the OpenAI API to create a job profile for the candidate.
        @return: Agent - The agent that can be used to profile the resume
        """

        print("started profile_resume")
        openai = OpenAI()

        system_prompt = self.get_system_prompt()
        print(f"system_prompt:\n {system_prompt}")

        resume_processor_agent = Agent(
            model=MODEL,
            name="ResumeProcessorAgent",
            instructions=system_prompt,
            tools=[read_resume],
            output_type=ResumeModel
        )

        return resume_processor_agent

    async def run_with_trace(self):
        """
        This function is used to run the agent with trace.
        """
        profiled_resume = None
        trace_id = gen_trace_id()
        print(f"Trace@ https://platform.openai.com/api/traces/{trace_id}")
        
        with trace(trace_id) as tracer:
            profiled_resume = await Runner.run(self.get_resume_processor_agent(), self.get_user_prompt())

        return profiled_resume.final_output if profiled_resume else None

In [None]:
profiled_resume = await ResumeProcessorAgent("Saaniya Desai", MODEL).run_with_trace()

print(f"profiled_resume:\n {profiled_resume}")

In [None]:
print(profiled_resume)


In [None]:
class JobSearcherAgent:
    """
    Agent to search for jobs based on the job profile.
    @param job_profile: ResumeModel - The job profile of the candidate
    @param model: str - The model to use for the agent
    """
    def __init__(self, job_profile: ResumeModel, model: str = MODEL):
        self.job_profile = job_profile
        self.model = model
        self.brave_env = {"BRAVE_API_KEY": os.getenv("BRAVE_API_KEY")}

    def get_system_prompt(self) -> str:
        return f"""
        You are an expert Job Search Agent specializing in finding active job opportunities that perfectly match a candidate's profile. 
        Your mission is to conduct comprehensive job searches across multiple platforms and extract detailed, actionable information about each opportunity.
        You are given a candidate's profile as a JSON object represented by the Pydantic model ResumeModel.
        Your task is to search AS MANY job search sites as possible using specified MCP servers for searching the web and 
        return a list of job postings as a JSON object represented by the Pydantic model JobPostingList.

        ## Core Responsibilities
        - Using Candidate's Profile, find the list of top 10 companies that the candidate would be most interested in working for. Search the job postings on their career pages.
        - Deep Job Search: Use all available MCP tools to search extensively across job boards, company websites, and professional platforms
        - Active Job Validation: Verify that job postings are current, active, and accepting applications
        - Detailed Information Extraction: Extract comprehensive job details including application URLs, requirements, and company information
        - Profile Matching: Ensure all found opportunities align with the candidate's experience level, skills, and target company profile

        ## Search Strategy 
        ### Primary Search Sources (Use MCP tools for each):
        - Major Job Boards: LinkedIn, Indeed, Glassdoor, ZipRecruiter, Monster
        - Tech-Specific Platforms: Stack Overflow Jobs, AngelList, Dice, GitHub Jobs
        - Company Career Pages: Direct searches on target company websites. 
        - Professional Networks: Remote work platforms, industry-specific job boards
        - Government/Public Sector: USAJobs, state government portals (if relevant)

        ### Search Methodology:
        - Keyword-Based Search: Use the candidate's keywords for initial broad searches
        - Title-Specific Search: Search for specific job titles mentioned in target roles
        - Company-Specific Search: Search directly on target companies' career pages
        - Location-Based Search: Consider remote, hybrid, and location preferences
        - Experience-Level Filtering: Filter by appropriate seniority level

        ## Job Filtering Rules:
        - Experience Level Match: Only include jobs appropriate for candidate's experience level
        - Skill Relevance: Prioritize jobs requiring 70%+ of candidate's skills
        - Company Culture Fit: Favor companies matching target company profile
        - Active Status: Only include jobs actively accepting applications
        - Application Accessibility: Ensure application process is clearly accessible

        ## Search Execution Instructions
        ### Step 1: Broad Discovery Search
        - Use MCP web search tools with candidate's primary keywords
        - Search major job boards with experience level filters
        - Cast a wide net initially to discover opportunities

        ### Step 2: Targeted Company Search
        - Search career pages of companies in target company profile
        - Use company names + "careers" or "jobs" in search queries
        - Look for new/recent postings on company websites

        ### Step 3: Deep Information Extraction
        - For each promising job found, fetch the complete job posting
        - Navigate to application pages to verify they're active
        - Extract all required information fields

        ### Step 4: Verification & Validation
        - Verify job posting dates are recent (within last 30 days)
        - Confirm application links are functional
        - Check for duplicate postings across platforms

        ## Format: JobPostingList JSON Schema
        - Return a structured JSON object containing:
            - job_postings: List[JobPosting] - List of job postings
            - JSON SHEMAM: {JobPostingList.model_json_schema()}
            
        RULES:
        1. IMPORTANT: You must use the candidate's target company profile (enclosed in <TARGET_COMPANY_PROFILE>...</TARGET_COMPANY_PROFILE> tags) to filter the job postings relevant to the candidate.
        2, IMPORTANT: You must use the candidate's keywords (enclosed in <KEYWORDS>...</KEYWORDS> tags) to filter the job postings relevant to the candidate.
        3, IMPORTANT: You must use the candidate's summary (enclosed in <SUMMARY>...</SUMMARY> tags) to filter the job postings relevant to the candidate.
        4. IMPORTANT: You must use the candidate's experience level (enclosed in <EXPERIENCE_LEVEL>...</EXPERIENCE_LEVEL> tags) to filter the job postings relevant to the candidate.
        5. Return only JSON object, no other text.
        """

    def get_user_prompt(self) -> str:
        return f"""
         Use the following information to search for the most relevant jobs relevant to the candidate's profile.
        <SUMMARY>
        {self.job_profile.summary}
        </SUMMARY>
        <KEYWORDS>
        {', '.join(self.job_profile.keywords)}
        </KEYWORDS>
        <TARGET_COMPANY_PROFILE>
        {self.job_profile.target_company_profile}
        </TARGET_COMPANY_PROFILE>
        <EXPERIENCE_LEVEL>
        {self.job_profile.experience_level}
        </EXPERIENCE_LEVEL>
        """

    async def get_mcp_server_params(self) -> list:
        """
        This function is used to get the MCP server parameters.
        @return: list - List of dictionaries containing the MCP server parameters
        """
        print(self.brave_env)
        return [
            {"command": "uvx", "args": ["mcp-server-fetch"]}, 
            {
                "command": "npx",
                "args": ["-y", "@modelcontextprotocol/server-brave-search"],
                "env": self.brave_env,
            }
        ]

    async def get_job_searcher_agent(self, mcp_server_list: list) -> Agent:
        """
        This function is used to create a job searcher agent that can be used to search for jobs based on the candidate's profile.
        @return: Agent - The job searcher agent
        """
        system_prompt = self.get_system_prompt()
        print(f"system_prompt:\n {system_prompt}")
        
        job_searcher_agent = Agent(
            name="JobSearcherAgent",
            instructions=system_prompt,
            mcp_servers=mcp_server_list,
            model=MODEL,
            output_type=JobPostingList,
        )

        return job_searcher_agent
    
    async def search_jobs(self) -> JobPostingList:
        """
        This function is used to search for jobs based on the candidate's profile.
        @return: JobPostingList - The list of job postings
        """
        job_postings = None

        user_prompt = self.get_user_prompt()
        print(f"user_prompt:\n {user_prompt}")

        mcp_server_params = await self.get_mcp_server_params()
        mcp_server_list = []
        async with AsyncExitStack() as stack:
            mcp_server_list = [
                await stack.enter_async_context(
                    MCPServerStdio(params, client_session_timeout_seconds=120)
                )
                for params in mcp_server_params
            ]

            job_searcher_agent = await self.get_job_searcher_agent(mcp_server_list)
            
            trace_id = gen_trace_id()
            print(f"Trace@ https://platform.openai.com/api/traces/{trace_id}")
            with trace(trace_id) as tracer:
                job_postings = await Runner.run(job_searcher_agent, user_prompt)

        return job_postings.final_output if job_postings else None
    

In [None]:
job_postings = await JobSearcherAgent(profiled_resume, MODEL).search_jobs()

print(f"job_postings:\n {job_postings}")

In [None]:
for job_posting in job_postings.job_postings:
    print(job_posting.title)
    print(job_posting.company)
    print(job_posting.link)
    print("--------------------------------\n")
