# End of week 1 exercise

To demonstrate your familiarity with OpenAI API, and also Ollama, build a tool that takes a technical question,  
and responds with an explanation. This is a tool that you will be able to use yourself during the course!

This is a job search utility that fetches and cleans job listings from the Greenhouse API before passing them to an LLM for intelligent role matching. It successfully implements structured JSON outputs via the OpenRouter API to evaluate and return relevant positions based on core responsibilities and skills.

In [None]:
import os
import json
import requests
from bs4 import BeautifulSoup
from openai import OpenAI
from dotenv import load_dotenv

In [None]:
load_dotenv(override=True)
api_key = os.getenv('OPENROUTER_API_KEY')

if not api_key:
    print("No API key was found! Please check your .env file.")

In [None]:
MODEL_GPT = 'gpt-4o-mini'
OPEN_ROUTER_BASE_URL = "https://openrouter.ai/api/v1"
client = OpenAI(api_key=api_key, base_url=OPEN_ROUTER_BASE_URL)

In [None]:
# Fetching and Cleaning Data from the API ---
def fetch_jobs_from_api(api_url, max_jobs=30):
    """
    Fetches the job list from Greenhouse, including descriptions, and cleans the HTML.
    We limit the number of jobs processed to avoid exceeding LLM context limits.
    """
    
    print(f"Fetching job data from API: {api_url}...")
    
    response = requests.get(api_url)
    response.raise_for_status() 
    
    data = response.json()
    
    simplified_jobs = []
    
    for job in data.get('jobs', [])[:max_jobs]:
        
        raw_html = job.get("content", "")
        
        # Strip the HTML tags to get clean, readable text
        clean_text = BeautifulSoup(raw_html, "html.parser").get_text(separator=" ", strip=True)
        
        simplified_jobs.append({
            "title": job.get("title"),
            "location": job.get("location", {}).get("name"),
            "url": job.get("absolute_url"),
            # truncate the description to 1500 characters to save tokens 
            "description": clean_text[:1500] 
        })
        
    print(f"Successfully fetched and cleaned {len(simplified_jobs)} jobs.")
    return simplified_jobs

In [None]:
# --- Extracting the Best Match with the LLM ---
def find_matching_jobs(jobs_list, desired_job, llm_model=MODEL_GPT):
    """
    Passes the job list to the LLM to find ALL jobs that match the user's preference.
    """
    print(f"Asking the LLM to find ALL matches for: '{desired_job}'...")
    
    system_prompt = (
        "You are an intelligent career matchmaking assistant. "
        "You will be provided with a JSON list of available company jobs, including their descriptions. "
        "Your task is to thoroughly read the job descriptions and find ALL jobs that match the user's desired role. "
        "Do not rely on the job title alone; verify the responsibilities and requirements.\n\n"
        "You MUST return a valid JSON object with a single key called 'matches'. "
        "'matches' must be a list of objects, where each object has the exact following keys:\n"
        "- job_title (string)\n"
        "- location (string)\n"
        "- url (string)\n"
        "- job_description_summary (string - a 2 sentence summary of the role)\n"
        "- required_skills (list of strings - extract 3 to 5 core technical or soft skills required)\n"
        "- match_description (string - explain briefly why this matches the user's criteria)\n\n"
        "If no jobs match the criteria, return an empty list for 'matches'."
    )

    user_content = f"Desired Job Role: {desired_job}\n\nAvailable Jobs:\n{json.dumps(jobs_list)}"
    
    response = client.chat.completions.create(
        model=llm_model,
        response_format={ "type": "json_object" }, 
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_content}
        ],
        temperature=0.1 
    )

    return json.loads(response.choices[0].message.content)

In [None]:
# --- Main Execution ---
def search_for_jobs(target_job_title, api_url, max_jobs_to_fetch):
    
    try:
        all_jobs = fetch_jobs_from_api(api_url, max_jobs_to_fetch)
        
        matching_results = find_matching_jobs(all_jobs, target_job_title)
        
        matches_list = matching_results.get("matches", [])
        
        if not matches_list:
            print(f"\nNo jobs found matching '{target_job_title}'.")
        else:
            print(f"\n FOUND {len(matches_list)} MATCHING JOBS:\n")
            print(json.dumps(matching_results, indent=4))
        
    except Exception as e:
        print(f"An error occurred: {e}")

In [None]:
MAX_JOBS_TO_FETCH = 30
search_for_jobs("software engineering", 'https://boards-api.greenhouse.io/v1/boards/doordashusa/jobs?content=true', MAX_JOBS_TO_FETCH)