In [3]:
import re
import csv
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
import urllib.parse

BASE_DOMAIN = "https://www.jobscout24.ch"

JOB = "data scientist"
CITY = 1200   # psz fonctionne avec le code postal 
ORDER = 1     # tri par date

OUTPUT_CSV = "jobscout24_results.csv"

HEADERS = {
    "User-Agent": (
        "Mozilla/5.0 (Windows NT 10.0; Win32; x32) "
        "AppleWebKit/537.36 (KHTML, like Gecko) "
        "Chrome/129.0 Safari/537.36"
    ),
    "Accept-Language": "fr-FR,fr;q=0.9,en;q=0.8",
}

def build_search_url(page: int) -> str:
    job_url_encoded = urllib.parse.quote(JOB, safe="")
    return f"https://www.jobscout24.ch/fr/jobs/{job_url_encoded}/?psz={CITY}&sort={ORDER}&p={page}"


def get_soup(url: str) -> BeautifulSoup:
    resp = requests.get(url, headers=HEADERS, timeout=20)
    resp.raise_for_status()
    return BeautifulSoup(resp.text, "lxml")


def get_total_pages(soup: BeautifulSoup) -> int:
    """
    Cherche le bloc :
        <li>Page 1 / 3</li>
    et r√©cup√®re le nombre total de pages (3).
    """
    li = soup.select_one("div.pages li")
    if not li:
        return 1

    text = li.get_text(strip=True)
    m = re.search(r"/\s*(\d+)", text)
    return int(m.group(1)) if m else 1


def extract_jobs_from_page(soup: BeautifulSoup):
    """
    R√©cup√®re :
    - titre du job
    - url
    - entreprise
    - ville
    - tags (ex: 100%, PME...)
    - date
    """
    jobs = []

    for li in soup.select("li.job-list-item"):
        a = li.select_one("a.job-link-detail.job-title")
        if not a:
            continue

        title = a.get("title") or a.get_text(strip=True)
        href = a.get("href")
        full_url = urljoin(BASE_DOMAIN, href)

        # Entreprise + Ville
        attrs = li.select_one("p.job-attributes")
        company, city = None, None
        if attrs:
            spans = attrs.select("span")
            if len(spans) >= 1:
                company = spans[0].get_text(strip=True)
            if len(spans) >= 2:
                city = spans[1].get_text(strip=True)

        # Tags (80%-100%, PME, etc.)
        tags = [t.get_text(strip=True) for t in li.select("div.job-tags span")]

        # Date
        date_elem = li.select_one("p.job-date")
        date = date_elem.get_text(strip=True) if date_elem else None

        jobs.append({
            "title": title,
            "url": full_url,
            "company": company,
            "city": city,
            "tags": ", ".join(tags),
            "date": date
        })

    return jobs


def save_csv(jobs, filename):
    with open(filename, "w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=jobs[0].keys())
        writer.writeheader()
        writer.writerows(jobs)
    print(f"\nüíæ Sauvegard√© dans : {filename}\n")


def main():
    print("üîé R√©cup√©ration de la premi√®re page...")
    soup = get_soup(build_search_url(1))

    total_pages = get_total_pages(soup)
    print(f"üìÑ Nombre total de pages : {total_pages}")

    all_jobs = []

    for page in range(1, total_pages + 1):
        print(f"\n=== üìå Page {page}/{total_pages} ===")
        url = build_search_url(page)
        soup = get_soup(url)

        jobs = extract_jobs_from_page(soup)

        print(f"‚û° {len(jobs)} offres trouv√©es.")
        all_jobs.extend(jobs)

    print(f"\nüìä Total global : {len(all_jobs)} offres collect√©es.")

    if all_jobs:
        save_csv(all_jobs, OUTPUT_CSV)
    else:
        print("‚ö†Ô∏è Aucun job trouv√©, CSV non cr√©√©.")


if __name__ == "__main__":
    main()

üîé R√©cup√©ration de la premi√®re page...
üìÑ Nombre total de pages : 1

=== üìå Page 1/1 ===
‚û° 3 offres trouv√©es.

üìä Total global : 3 offres collect√©es.

üíæ Sauvegard√© dans : jobscout24_results.csv

