In [1]:
import logging
from linkedin_jobs_scraper import LinkedinScraper
from linkedin_jobs_scraper.events import Events, EventData, EventMetrics
from linkedin_jobs_scraper.query import Query, QueryOptions, QueryFilters
from linkedin_jobs_scraper.filters import RelevanceFilters, TimeFilters, TypeFilters, ExperienceLevelFilters, \
    OnSiteOrRemoteFilters, SalaryBaseFilters

# Change root logger level (default is WARN)
logging.basicConfig(level=logging.INFO)


# Fired once for each successfully processed job
def on_data(data: EventData):
    print('[ON_DATA]', data.title, data.company, data.company_link, data.date, data.link, data.insights,
          len(data.description))


# Fired once for each page (25 jobs)
def on_metrics(metrics: EventMetrics):
    print('[ON_METRICS]', str(metrics))


def on_error(error):
    print('[ON_ERROR]', error)


def on_end():
    print('[ON_END]')


scraper = LinkedinScraper(
    chrome_executable_path=None,  # Custom Chrome executable path (e.g. /foo/bar/bin/chromedriver)
    chrome_binary_location=None,  # Custom path to Chrome/Chromium binary (e.g. /foo/bar/chrome-mac/Chromium.app/Contents/MacOS/Chromium)
    chrome_options=None,  # Custom Chrome options here
    headless=True,  # Overrides headless mode only if chrome_options is None
    max_workers=1,  # How many threads will be spawned to run queries concurrently (one Chrome driver for each thread)
    slow_mo=0.5,  # Slow down the scraper to avoid 'Too many requests 429' errors (in seconds)
    page_load_timeout=40  # Page load timeout (in seconds)    
)

# Add event listeners
scraper.on(Events.DATA, on_data)
scraper.on(Events.ERROR, on_error)
scraper.on(Events.END, on_end)

queries = [
    Query(
        query='Data Scientist',
        options=QueryOptions(
            locations=['United States'],
            apply_link=True,  # Try to extract apply link (easy applies are skipped). If set to True, scraping is slower because an additional page must be navigated. Default to False.
            skip_promoted_jobs=True,  # Skip promoted jobs. Default to False.
            page_offset=2,  # How many pages to skip
            limit=5,
            filters=QueryFilters(
                company_jobs_url='https://www.linkedin.com/jobs/search/?f_C=1441%2C17876832%2C791962%2C2374003%2C18950635%2C16140%2C10440912&geoId=92000000',  # Filter by companies.                
                relevance=RelevanceFilters.RECENT,
                time=TimeFilters.MONTH,
                type=[TypeFilters.FULL_TIME],
                on_site_or_remote=[OnSiteOrRemoteFilters.REMOTE],
                experience=[ExperienceLevelFilters.MID_SENIOR],
                base_salary=SalaryBaseFilters.SALARY_200K
            )
        )
    ),
]

scraper.run(queries)

INFO:li:scraper:('Using strategy AnonymousStrategy',)
INFO:li:scraper:('Starting new query', "Query(query=Data Scientist options=QueryOptions(limit=5 locations=['United States'] filters=QueryFilters(company_jobs_url=https://www.linkedin.com/jobs/search/?f_C=1441%2C17876832%2C791962%2C2374003%2C18950635%2C16140%2C10440912&geoId=92000000 relevance=RelevanceFilters.RECENT time=TimeFilters.MONTH base_salary=SalaryBaseFilters.SALARY_200K type=[<TypeFilters.FULL_TIME: 'F'>] experience=[<ExperienceLevelFilters.MID_SENIOR: '4'>] on_site_or_remote=[<OnSiteOrRemoteFilters.REMOTE: '2'>]) apply_link=True skip_promoted_jobs=True page_offset=2))")
INFO:li:scraper:('Chrome debugger url', 'http://localhost:61921')
INFO:li:scraper:('Websocket debugger url: ', 'ws://localhost:61921/devtools/page/A68EE5FA8D566A2240147FD2A6AF5EF1')
INFO:li:scraper:('[Data Scientist][United States]', 'Opening https://www.linkedin.com/jobs/search?keywords=Data+Scientist&location=United+States&f_C=1441%2C17876832%2C791962%2C

[ON_DATA] Senior Data Scientist, Product Google  2024-08-04 https://www.linkedin.com/jobs/view/senior-data-scientist-product-at-google-3932604692?position=1&pageNum=0&refId=PrPeK5UGwAJK4WZ9P4Glgg%3D%3D&trackingId=2cCgORdVpVIBPhevIYH1Tw%3D%3D&trk=public_jobs_jserp-result_search-card [] 4339


INFO:li:scraper:('[Data Scientist][United States][2]', 'Processed')


[ON_DATA] Senior Data Scientist, Research Google  2024-08-10 https://www.linkedin.com/jobs/view/senior-data-scientist-research-at-google-3978818933?position=2&pageNum=0&refId=PrPeK5UGwAJK4WZ9P4Glgg%3D%3D&trackingId=LnWIalWAirtg9wDMac%2B6ZA%3D%3D&trk=public_jobs_jserp-result_search-card [] 4069


INFO:li:scraper:('[Data Scientist][United States][3]', 'Processed')


[ON_DATA] Senior Data Scientist, Research Google  2024-08-15 https://www.linkedin.com/jobs/view/senior-data-scientist-research-at-google-3965994762?position=3&pageNum=0&refId=PrPeK5UGwAJK4WZ9P4Glgg%3D%3D&trackingId=z8o%2Bfr%2Baq9hGtTL64D3Scg%3D%3D&trk=public_jobs_jserp-result_search-card [] 4069


INFO:li:scraper:('[Data Scientist][United States][4]', 'Processed')


[ON_DATA] Senior Data Scientist, Product Google  2024-08-08 https://www.linkedin.com/jobs/view/senior-data-scientist-product-at-google-3959145632?position=4&pageNum=0&refId=PrPeK5UGwAJK4WZ9P4Glgg%3D%3D&trackingId=AgWlq4Sb0pYURllK2eV9eA%3D%3D&trk=public_jobs_jserp-result_search-card [] 4339


ERROR:li:scraper:('[Data Scientist][United States][5]', 'Timeout on loading job details')
NoneType: None
ERROR:li:scraper:('[Data Scientist][United States][5]', 'Timeout on loading job details')
NoneType: None
INFO:li:scraper:('[Data Scientist][United States][5]', 'Processed')


[ON_DATA] Senior Data Scientist, Product Google  2024-08-15 https://www.linkedin.com/jobs/view/senior-data-scientist-product-at-google-4000193729?position=7&pageNum=0&refId=PrPeK5UGwAJK4WZ9P4Glgg%3D%3D&trackingId=DhCqBZufAWSrCAYouDI%2FMw%3D%3D&trk=public_jobs_jserp-result_search-card [] 4339
[ON_END]
