In [13]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
import time

def scrape_linkedin_jobs(job_title, location):
    # Set up Chrome options for headless mode
    chrome_options = Options()
    # chrome_options.add_argument("--headless")
    chrome_options.add_argument("--no-sandbox")
    chrome_options.add_argument("--disable-dev-shm-usage")
    
    # Set up the driver (make sure chromedriver is in your PATH)
    driver = webdriver.Chrome(options=chrome_options)

    # Navigate to LinkedIn jobs search page
    linkedin_url = "https://www.linkedin.com/jobs/search/"
    driver.get(linkedin_url)
    time.sleep(2)  # Wait for the page to load
    try:
        driver.find_element(By.CLASS_NAME,'contextual-sign-in-modal__modal-dismiss-icon').click()
    except:
        print('login dismiss not found')
        
    # Input the job title
    search_title_box = driver.find_element(By.XPATH, '//input[@aria-label="Search job titles or companies"]')
    search_title_box.send_keys(job_title)
    
    # Input the location
    search_location_box = driver.find_element(By.XPATH, '//input[@aria-label="Location"]')
    search_location_box.clear()  # Clear the default location
    search_location_box.send_keys(location)
    
    # Click the search button
    search_button = driver.find_element(By.CSS_SELECTOR, '#jobs-search-panel > form > button > icon > svg')
    search_button.click()

    
    time.sleep(3)  # Wait for search results to load

    # Scrape the job listings
    job_listings = []
    job_elements = driver.find_elements(By.CLASS_NAME, 'jobs-search__results-list')
    job_elements = job_elements[0].find_elements(By.TAG_NAME,'li')

    for job_element in job_elements:
        try:
            job_title = job_element.find_element(By.CLASS_NAME, 'base-search-card__info').text
            company_name = job_element.find_element(By.CLASS_NAME, 'base-search-card__subtitle').text
            location = job_element.find_element(By.CLASS_NAME, 'job-search-card__location').text
            job_link = job_element.find_element(By.TAG_NAME, 'a').get_attribute('href')

            job_listings.append({
                'Job Title': job_title,
                'Company': company_name,
                'Location': location,
                'Link': job_link
            })
        except Exception as e:
            print(f"Error while scraping job: {e}")
            continue
    
    # Close the driver
    driver.quit()

    return job_listings


In [14]:

# Example usage
if __name__ == "__main__":
    job_title = "Data Engineer"
    location = "Montreal, Quebec, Canada"
    
    jobs = scrape_linkedin_jobs(job_title, location)
    
    for i, job in enumerate(jobs, start=1):
        print(f"Job {i}:")
        print(f"Title: {job['Job Title']}")
        print(f"Company: {job['Company']}")
        print(f"Location: {job['Location']}")
        print(f"Link: {job['Link']}")
        print("-" * 20)


login dismiss not found
Job 1:
Title: Data Engineer, Part Time
DARO
Montreal, Quebec, Canada
2 weeks ago
Company: DARO
Location: Montreal, Quebec, Canada
Link: https://ca.linkedin.com/jobs/view/data-engineer-part-time-at-daro-4011014318?position=1&pageNum=0&refId=23fm%2FpsXgHjEELdlAuA7%2Fw%3D%3D&trackingId=r51bd4AFS0yp3yuvldJm2w%3D%3D&trk=public_jobs_jserp-result_search-card
--------------------
Job 2:
Title: Data Engineer, Quantitative Hedge Fund
Nascent
Montreal, Quebec, Canada
Be an early applicant
1 month ago
Company: Nascent
Location: Montreal, Quebec, Canada
Link: https://ca.linkedin.com/jobs/view/data-engineer-quantitative-hedge-fund-at-nascent-4017711588?position=2&pageNum=0&refId=23fm%2FpsXgHjEELdlAuA7%2Fw%3D%3D&trackingId=x64xn70cXJEA5lZjD4DB5Q%3D%3D&trk=public_jobs_jserp-result_search-card
--------------------
Job 3:
Title: Data Engineer
FLiiP
Brossard, Quebec, Canada
Actively Hiring
4 days ago
Company: FLiiP
Location: Brossard, Quebec, Canada
Link: https://ca.linkedin.com/j

In [23]:
import pandas as pd

jobs_df = pd.DataFrame(jobs)

In [24]:
jobs_df


Unnamed: 0,Job Title,Company,Location,Link
0,"Data Engineer, Part Time\nDARO\nMontreal, Queb...",DARO,"Montreal, Quebec, Canada",https://ca.linkedin.com/jobs/view/data-enginee...
1,"Data Engineer, Quantitative Hedge Fund\nNascen...",Nascent,"Montreal, Quebec, Canada",https://ca.linkedin.com/jobs/view/data-enginee...
2,"Data Engineer\nFLiiP\nBrossard, Quebec, Canada...",FLiiP,"Brossard, Quebec, Canada",https://ca.linkedin.com/jobs/view/data-enginee...
3,Junior Software Engineer (Remote)\nPolicyMe\nG...,PolicyMe,Greater Montreal Metropolitan Area,https://ca.linkedin.com/jobs/view/junior-softw...
4,Data Processing Specialist - Python Developer\...,FortNine,"Montreal, Quebec, Canada",https://ca.linkedin.com/jobs/view/data-process...
5,"Data Engineer\nTek Tron IT\nMontreal, Quebec, ...",Tek Tron IT,"Montreal, Quebec, Canada",https://ca.linkedin.com/jobs/view/data-enginee...
6,"Data Developer\nUbisoft\nMontreal, Quebec, Can...",Ubisoft,"Montreal, Quebec, Canada",https://ca.linkedin.com/jobs/view/data-develop...
7,Data Science Engineer - remote\nHour Consultin...,Hour Consulting,"Montreal, Quebec, Canada",https://ca.linkedin.com/jobs/view/data-science...
8,Python Big Data Developer - FinTech - $200k CA...,Hunter Bond,"Montreal, Quebec, Canada",https://ca.linkedin.com/jobs/view/python-big-d...
9,Database Engineer- Canada\nZortech Solutions\n...,Zortech Solutions,"Montreal, Quebec, Canada",https://ca.linkedin.com/jobs/view/database-eng...


In [30]:
def scrape_linkedin_job_pages(jobs_df):
    # Set up Chrome options for headless mode
    chrome_options = Options()
    # chrome_options.add_argument("--headless")
    chrome_options.add_argument("--no-sandbox")
    chrome_options.add_argument("--disable-dev-shm-usage")
    
    # Set up the driver (make sure chromedriver is in your PATH)
    driver = webdriver.Chrome(options=chrome_options)

    for url in jobs_df['Link']:
        # Navigate to LinkedIn jobs search page
        linkedin_url = url
        driver.get(linkedin_url)
        content = driver.find_element(By.CLASS_NAME,'show-more-less-html__markup').text
        time.sleep(2)  # Wait for the page to load
        jobs_df[jobs_df['Link'] == url]['Content'] = content
        

In [31]:
scrape_linkedin_job_pages(jobs_df)