# AI Job Board Scraper (Live)

Code authored by: Shaw Talebi

### imports

In [1]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
import re
import pandas as pd

### grab job urls from webpage

In [2]:
base_url = "https://aijobs.ai/united-states"

In [3]:
res = requests.get(base_url)
soup = BeautifulSoup(res.text, "html.parser")

In [4]:
# find all <a> tags where the href contains "/job/"
a_tags = soup.select('a[href*="/job/"]')

# Step 2: go through each <a> tag and get the link
job_links = []
for tag in a_tags:
    href = tag.get("href")   # get the link from the tag
    if href:                 # make sure it’s not empty
        full_url = urljoin(base_url, href)  # turn relative into absolute
        job_links.append(full_url)

In [18]:
job_links

['https://aijobs.ai/job/ai-agent-engineer-13',
 'https://aijobs.ai/job/full-time-ai-developer-stealth-saas-project-ai-first-high-growth',
 'https://aijobs.ai/job/full-time-ai-engineer-for-long-term-role-in-self-funded-company',
 'https://aijobs.ai/job/online-data-research-1',
 'https://aijobs.ai/job/full-stack-software-engineer-13',
 'https://aijobs.ai/job/ai-prompt-engineer-part-time-flexible-schedule',
 'https://aijobs.ai/job/senior-software-engineer-agents',
 'https://aijobs.ai/job/lead-software-engineer-ml-backend',
 'https://aijobs.ai/job/staff-engineer-software-autonomy-applications-r3166-3',
 'https://aijobs.ai/job/ml-application-security-engineer',
 'https://aijobs.ai/job/solutions-engineering-senior-manager',
 'https://aijobs.ai/job/ml-engineer-llm-evaluation',
 'https://aijobs.ai/job/ml-engineer-llm-safety',
 'https://aijobs.ai/job/ml-engineer-llm-privacy',
 'https://aijobs.ai/job/ml-research-scientist-llm-safety',
 'https://aijobs.ai/job/staff-software-engineer-34',
 'https:

### scrape job data from urls

In [6]:
def get_job_data(job_link):
    """
    Scrape job details from a given job posting URL.

    Args:
        job_link (str): The full URL of the job posting page.

    Returns:
        dict: A dictionary containing:
            - "Job Title" (str or None): The job title text, if found.
            - "Job Description" (str or None): The job description text, 
              with line breaks preserved, if found.
            - "Salary Range" (str or None): The salary range as a formatted string 
              (e.g., "$50,000 - $70,000"), or a single value if only one number is found, 
              or None if no salary is listed.
    """

    # create soup object
    res = requests.get(job_link)
    soup = BeautifulSoup(res.text, "html.parser")
    
    # get job title
    title_el = soup.select_one(".post-main-title2")
    job_title = title_el.get_text(strip=True) if title_el else None
    
    # get job description
    desc_el = soup.select_one(".job-description-container")
    job_description = desc_el.get_text(separator="\n", strip=True) if desc_el else None
    
    # get salary
    salary_el = soup.select_one(".salery h2")   # note: the site spells it "salery"
    salary_range = None
    if salary_el:
        text = salary_el.get_text(strip=True)
        numbers = re.findall(r"\d[\d,]*", text)  # find all numbers
        if len(numbers) >= 2:
            salary_range = f"${numbers[0]} - ${numbers[1]}"
        elif len(numbers) == 1:
            salary_range = f"${numbers[0]}"
    
    
    return {
        "Job Title": job_title,
        "Job Description": job_description,
        "Salary Range:": salary_range
    }

In [7]:
job_data_list = []
for job_link in job_links:
    job_data_list.append(get_job_data(job_link))

In [8]:
job_data_list

[{'Job Title': 'AI Agent Engineer',
  'Job Description': "Role Overview\nWe are seeking a highly skilled\nAI Agent Engineer\nto design, build, and deploy advanced AI agents that leverage cutting-edge large language models (LLMs) to solve complex real-world problems. These AI agents are intricate software systems that simulate human-like reasoning, decision-making, and creativity to tackle business challenges autonomously.\nIn this pivotal role, you will drive the development of intelligent agents from concept through production, integrating them into scalable applications and ensuring they operate reliably at scale. You'll be instrumental in building the core intelligence of our products, working across the entire AI/ML lifecycle in a collaborative, high-trust environment.\nThis role is fully remote and open to candidates globally\n, allowing you to contribute from anywhere while collaborating with a diverse, distributed team.\nKey Responsibilities\nAgent Development & Architecture\nDe

### save job data to file

In [11]:
df = pd.DataFrame(job_data_list)

In [13]:
df.head(10)

Unnamed: 0,Job Title,Job Description,Salary Range:
0,AI Agent Engineer,Role Overview\nWe are seeking a highly skilled...,
1,Full-Time AI Developer – Stealth SaaS Project ...,We’re building a cutting-edge B2B SaaS platfor...,"$70,000 - $250,001"
2,Full-Time AI Engineer for Long-Term Role in Se...,"I'm the founder of a successful, self-funded c...","$100,000 - $200,000"
3,Online Data Research,Job Description\nLooking for a freelance oppor...,
4,Full-Stack Software Engineer,About this role:\nAs a Software Engineer at Co...,"$10,000"
5,"AI Prompt Engineer - PART TIME, FLEXIBLE SCHEDULE",Looking for an AI Prompt Engineer OR a very kn...,$35 - $50
6,Senior Software Engineer - Agents,About xAI\nxAI’s mission is to create AI syste...,"$180,000 - $440,000"
7,"Lead Software Engineer, ML Backend","At Dynamo AI, our mission is to empower every ...",
8,"Staff Engineer, Software Autonomy Applications...","Founded in 2015, Shield AI is a venture-backed...",
9,ML Application Security Engineer,"At\nDynamo AI\n, we believe that LLMs must be ...",


In [14]:
df.to_csv('data/job_data.csv')