In [5]:
import json
from pathlib import Path

from datatypes import converter, JobPosting
from util import get_html, fetch_job_pages, structure_job_postings, job_postings_to_df, plot_salary_ranges

In [6]:
ABCELLERA_CAREERS_URL = "https://abcellera.com/careers-openings/"
USER_SALARY = 80000
JOB_POSTINGS_PATH = Path("job_postings.json")

if not JOB_POSTINGS_PATH.exists():
    careers_page = get_html(ABCELLERA_CAREERS_URL)
    links = careers_page.find_all("a")
    job_page_links = [
        f"{url}{link.get('href')}"
        for link in links
        if link.string and "View position" in link.string
    ]
    job_pages = fetch_job_pages(job_page_links)
    job_postings = structure_job_postings(job_pages)
    with open("job_postings.json", "w") as fh:
        json.dump(converter.unstructure(job_postings), fh)
else:
    with open(JOB_POSTINGS_PATH) as fh:
        job_postings = converter.structure(json.load(fh), list[JobPosting])

job_postings_df = job_postings_to_df(job_postings)

fig = plot_salary_ranges(job_postings_df, salary_to_benchmark_against=USER_SALARY)
fig.write_html("abcellera_salaries.html")
fig.write_image("abcellera_salaries.png", width=2000, height=1000)