# Job Post Scrutiniser

This script implements a solution to scrap the contents of a job post on a web page, analyse or examine the details then come up with the skills and qualifications required.

In [None]:
# imports

import os
from dotenv import load_dotenv
from IPython.display import Markdown, display
from openai import OpenAI

# Load environment variables in a file called .env

load_dotenv(override=True)
api_key = os.getenv('OPENROUTER_API_KEY')

# Check the key

if not api_key:
    print("No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!")
elif not api_key.startswith("sk-"):
    print("An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook")
elif api_key.strip() != api_key:
    print("An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook")
else:
    print("API key found and looks good so far!")

## The Web Scraper

In [3]:
from bs4 import BeautifulSoup
import requests


# Standard headers to fetch a website
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}


def fetch_webpage_contents(url):
    """
    Return the title and contents of the website at the given url;
    truncate to 2,000 characters as a sensible limit
    """
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, "html.parser")
    title = soup.title.string if soup.title else "No title found"
    if soup.body:
        for irrelevant in soup.body(["script", "style", "img", "input"]):
            irrelevant.decompose()
        text = soup.body.get_text(separator="\n", strip=True)
    else:
        text = ""
    return (title + "\n\n" + text)


## Step 1: Create your prompts

In [4]:

system_prompt = """
You are a smart assistant that analyzes the contents of a job post on a web page, and 
provides a summary of the skills and qualifications required, ignoring text that might be 
navigation related. Highlight the skills and qualifications in bullet points.
Respond in markdown. Do not wrap the markdown in a code block - respond just with the markdown.
"""

user_prompt = """
Here are the contents of a job post from a website.
Please analyze the contents and come up with a summary of the skills and qualifications 
required. Highlight the skills and qualifications separately in bullet points.


"""

## Step 2: Make the messages list

In [5]:

def messages_for(webpage):
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt + webpage}
    ]

## Step 3: Call OpenAI

In [6]:

OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"
openrouter = OpenAI(base_url=OPENROUTER_BASE_URL, api_key=api_key)

def summarize(url, model):
    webpage = fetch_webpage_contents(url)
    response = openrouter.chat.completions.create(
        model = model,
        messages = messages_for(webpage)
    )
    return response.choices[0].message.content

In [None]:
job_post_url = "https://job-boards.greenhouse.io/givedirectly/jobs/4558344005"
model = "claude-sonnet-4.6"

summarize(job_post_url, model)

### A function to display this nicely in the output, using markdown

In [8]:

def display_summary(url, model):
    summary = summarize(url, model)
    display(Markdown(summary))

## Step 4: Print the result

In [None]:
job_post_url = "https://www.wave.com/en/careers/job/5726127004/?source=LinkedIn"
job_post_url = "https://degate.breezy.hr/p/2f4699404b60-senior-blockchain-engineer-vibe-coding"
job_post_url = "https://job-boards.greenhouse.io/canonicaljobs/jobs/7105406?gh_src=x04l28f21us"
display_summary(job_post_url, model)