# Job Scrapper

In [24]:
import pandas as pd
import requests
import re
from bs4 import BeautifulSoup
from datetime import date

In [25]:
today = date.today().isoformat()
link_list = [
    "https://www.karriere.at/jobs/controller/wien",
    "https://www.karriere.at/jobs/controller/linz",
    "https://www.karriere.at/jobs/controller/salzburg",
    "https://www.karriere.at/jobs/controller/graz",
    "https://www.karriere.at/jobs/controller/innsbruck",
    "https://www.karriere.at/jobs/controller/vorarlberg",
]

In [26]:
# Read existing data or create new DataFrame
try:
    df = pd.read_csv("data.csv")
except FileNotFoundError:
    df = pd.DataFrame(
        columns=["date", "title", "location", "job_count"],
    )
    df = df.astype({"date": str, "title": str, "location": str, "job_count": int})

In [None]:
for link in link_list:
    response = None
    max_retries = 3

    for attempt in range(max_retries):
        try:
            response = requests.get(link, timeout=5)
            if response.status_code == 200:
                break
            else:
                print(
                    f"Attempt {attempt + 1}: Status code {response.status_code} from {link}"
                )
        except requests.RequestException as e:
            print(f"Attempt {attempt + 1}: Request failed for {link} - {e}")

    if response is None or response.status_code != 200:
        print(f"Failed to retrieve data from {link} after {max_retries} attempts")
        continue

    # get the content of the css selector .m-jobsListHeader__title
    soup = BeautifulSoup(response.content, "html.parser")
    title = soup.select_one(".m-jobsListHeader__title").get_text()
    title = title.strip()

    match = re.search(r"(\d+)", title)
    job_count = int(match.group(1)) if match else 0

    # write to pandas dataframe
    location = link.split("/")[-1]
    new_row = {
        "date": today,
        "title": title,
        "location": location.capitalize(),
        "job_count": job_count,
    }
    df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)

In [28]:
df

Unnamed: 0,date,title,location,job_count
0,2026-01-17,372 Controller Jobs in Wien,wien,372
1,2026-01-17,127 Controller Jobs in Linz,linz,127
2,2026-01-17,102 Controller Jobs in Salzburg,salzburg,102
3,2026-01-17,100 Controller Jobs in Graz,graz,100
4,2026-01-17,23 Controller Jobs in Innsbruck,innsbruck,23
5,2026-01-17,22 Controller Jobs in Vorarlberg,vorarlberg,22


In [29]:
df.to_csv("data.csv", index=False)