In [15]:
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, ElementClickInterceptedException


In [23]:
def get_jobs(keyword, num_jobs, verbose=False):
    """Scrapes Glassdoor job postings and returns them as a Pandas DataFrame."""
    
    # Setup Chrome options
    options = webdriver.ChromeOptions()
    options.add_argument("--start-maximized")  # Open in full-screen mode
    options.add_argument("--disable-popup-blocking")  # Block popups

    # Initialize WebDriver
    driver = webdriver.Chrome(options=options)
    driver.set_window_size(1120, 1000)

    # Glassdoor job search URL (modified for Canada)
    url = f'https://www.glassdoor.com/Job/jobs.htm?sc.keyword={keyword}&locT=C&locId=1147401&locKeyword=Canada&jobType=all'
    driver.get(url)
    
    # Wait for the job listings to load with increased wait time
    try:
        print("Waiting for job listings to load...")
        WebDriverWait(driver, 20).until(
            EC.presence_of_all_elements_located((By.CLASS_NAME, "react-job-listing"))
        )
        print("Page loaded, job listings found.")
    except Exception as e:
        print("Error loading job listings:", e)
        driver.quit()
        return pd.DataFrame()  # Return empty DataFrame if page doesn't load

    jobs = []

    while len(jobs) < num_jobs:
        print(f"Scraping page... Jobs collected so far: {len(jobs)}")

        # Try to close any popups (signup/login)
        try:
            WebDriverWait(driver, 3).until(EC.element_to_be_clickable((By.CLASS_NAME, "selected"))).click()
            print("Closed signup popup.")
        except (NoSuchElementException, ElementClickInterceptedException):
            print("No signup popup found.")

        time.sleep(1)

        # Get job listings
        try:
            job_cards = driver.find_elements(By.CLASS_NAME, "react-job-listing")
            print(f"Found {len(job_cards)} job cards on this page.")
        except NoSuchElementException:
            print("Could not find job cards.")
            break  # Exit the loop if no job cards are found

        for job_card in job_cards:
            if len(jobs) >= num_jobs:
                break

            job_card.click()  # Click on the job card to get details
            time.sleep(2)

            try:
                company_name = driver.find_element(By.CLASS_NAME, "css-87uc0g").text  # Updated class name
                location = driver.find_element(By.CLASS_NAME, "css-56kyx5").text
                job_title = driver.find_element(By.CLASS_NAME, "css-1j389vi").text
                job_description = driver.find_element(By.CLASS_NAME, "jobDescriptionContent").text
            except NoSuchElementException:
                continue  # Skip if any element is missing

            # Try getting salary estimate
            try:
                salary_estimate = driver.find_element(By.CLASS_NAME, "css-1xe2xww").text
            except NoSuchElementException:
                salary_estimate = "Not Provided"

            # Try getting rating
            try:
                rating = driver.find_element(By.CLASS_NAME, "css-1m5m32b").text
            except NoSuchElementException:
                rating = "Not Rated"

            # Print for debugging
            if verbose:
                print(f"Job Title: {job_title}")
                print(f"Company: {company_name}")
                print(f"Location: {location}")
                print(f"Salary: {salary_estimate}")
                print(f"Rating: {rating}")
                print("------------------------------------------------------")

            jobs.append({
                "Job Title": job_title,
                "Company Name": company_name,
                "Location": location,
                "Salary Estimate": salary_estimate,
                "Rating": rating,
                "Job Description": job_description
            })

        # Move to next page
        try:
            next_button = driver.find_element(By.CLASS_NAME, "nextButton")
            next_button.click()
            time.sleep(4)  # Give time for new jobs to load
        except NoSuchElementException:
            print(f"Scraping finished early. Found {len(jobs)} jobs instead of {num_jobs}.")
            break

    driver.quit()  # Close browser after scraping
    return pd.DataFrame(jobs)



In [24]:

# Run the scraper for jobs in Canada
df = get_jobs("Data Scientist", num_jobs=10, verbose=True)
print(df.head())

Waiting for job listings to load...
Error loading job listings: Message: 

Empty DataFrame
Columns: []
Index: []
