In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
import time


def scroll_to_end(driver, pause_time=2):
    # Get initial scroll height
    last_height = driver.execute_script("return document.body.scrollHeight")

    while True:
        # Scroll down to the bottom
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

        # Wait to load the page
        time.sleep(pause_time)

        # Calculate new scroll height after scrolling
        new_height = driver.execute_script("return document.body.scrollHeight")

        # Check if the scroll height has changed
        if new_height == last_height:
            break  # If heights are the same, exit the loop

        last_height = new_height


def scrape_linkedin_jobs(job_title, location):
    # Set up Chrome options for headless mode
    chrome_options = Options()
    chrome_options.add_argument("--disable-blink-features=AutomationControlled")
    chrome_options.add_argument("--disable-infobars")  # Disable the "Chrome is being controlled by automated test software" message
    chrome_options.add_argument("--disable-extensions")  # Disable Chrome extensions that may flag the session
    chrome_options.add_argument("--no-sandbox")  # Use this for running on systems without graphical environment (like cloud)

    # Set up the driver (make sure chromedriver is in your PATH)
    driver = webdriver.Chrome(options=chrome_options)

    # Navigate to LinkedIn jobs search page
    linkedin_url = "https://www.linkedin.com/jobs/search/"
    driver.get(linkedin_url)
    time.sleep(2)  # Wait for the page to load
    try:
        driver.find_element(By.CLASS_NAME,'contextual-sign-in-modal__modal-dismiss-icon').click()
    except:
        print('login dismiss not found')
        

    # Input the job title
    search_title_box = driver.find_element(By.XPATH, '//input[@aria-label="Search job titles or companies"]')
    search_title_box.send_keys(job_title)
    
    # Input the location
    search_location_box = driver.find_element(By.XPATH, '//input[@aria-label="Location"]')
    search_location_box.clear()  # Clear the default location
    search_location_box.send_keys(location)
    
    # Click the search button
    search_button = driver.find_element(By.CSS_SELECTOR, '#jobs-search-panel > form > button > icon > svg')
    search_button.click()

    
    time.sleep(3)  # Wait for search results to load

    for i in range (8):
        scroll_to_end(driver)


    # Scrape the job listings
    job_listings = []
    job_elements = driver.find_elements(By.CLASS_NAME, 'jobs-search__results-list')
    job_elements = job_elements[0].find_elements(By.TAG_NAME,'li')

    for job_element in job_elements:
        try:
            job_title = job_element.find_element(By.CLASS_NAME, 'base-search-card__info').text
            company_name = job_element.find_element(By.CLASS_NAME, 'base-search-card__subtitle').text
            location = job_element.find_element(By.CLASS_NAME, 'job-search-card__location').text
            job_link = job_element.find_element(By.TAG_NAME, 'a').get_attribute('href')

            link = job_element.find_element(By.CLASS_NAME, 'base-card__full-link')
            link.click()

            time.sleep(3)

            description = driver.find_element(By.CLASS_NAME,'show-more-less-html__markup').text

            job_listings.append({
                'Job Title': job_title,
                'Company': company_name,
                'Location': location,
                'Link': job_link,
                'Description': description
            })
        except Exception as e:
            print(f"Error while scraping job: {e}")
            continue
    
    # Close the driver
    driver.quit()

    return job_listings


In [2]:

# Example usage
if __name__ == "__main__":
    job_title = "Data Engineer"
    location = "Montreal, Quebec, Canada"
    
    jobs = scrape_linkedin_jobs(job_title, location)
    
    for i, job in enumerate(jobs, start=1):
        print(f"Job {i}:")
        print(f"Title: {job['Job Title']}")
        print(f"Company: {job['Company']}")
        print(f"Location: {job['Location']}")
        print(f"Link: {job['Link']}")
        print("-" * 20)


Error while scraping job: Message: element click intercepted: Element <a class="base-card__full-link absolute top-0 right-0 bottom-0 left-0 p-0 z-[2]" href="https://ca.linkedin.com/jobs/view/data-engineer-part-time-at-daro-4011014318?position=1&amp;pageNum=0&amp;refId=eTEWfa21Sne9KkSUxWEd8Q%3D%3D&amp;trackingId=Ellcyt3T3PZTC4epFUkvzA%3D%3D&amp;trk=public_jobs_jserp-result_search-card" data-tracking-control-name="public_jobs_jserp-result_search-card" data-tracking-client-ingraph="" data-tracking-will-navigate="">...</a> is not clickable at point (509, 66). Other element would receive the click: <div class="artdeco-global-alert__content">...</div>
  (Session info: chrome=128.0.6613.138)
Stacktrace:
	GetHandleVerifier [0x00007FF71F7B9412+29090]
	(No symbol) [0x00007FF71F72E239]
	(No symbol) [0x00007FF71F5EB1DA]
	(No symbol) [0x00007FF71F646C1E]
	(No symbol) [0x00007FF71F644692]
	(No symbol) [0x00007FF71F641BAB]
	(No symbol) [0x00007FF71F640DC5]
	(No symbol) [0x00007FF71F632D81]
	(No symbo

In [4]:
import pandas as pd
jobs_df = pd.DataFrame(jobs)
jobs_df