## Web Scraping project: Google's Job postings

This code shows how to scrape google's job posting using python beautiful soup library and selenium.

* üèÜ This could be a great way to scrape job data compared to google's serp api for which you have to pay ü§ë beyond 100 api calls.

* ü¶æ Use the data to make your job search easier.
---

The XPATHS might change overtime. Use X-path Chrome extension for XPATH selection 

In [3]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException
from time import sleep
import urllib.parse
import itertools

def get_all_jobs(driver, role_name, company):
    query = f"https://www.google.com/search?q={role_name}+{company}&ibp=htl;jobs#htivrt=jobs"
    print(query)
    driver.get(query)
    sleep(3)  
    
    scroll_page(driver)
    sleep(2)  
    
    # Find all listings after scrolling # .//li[@data-ved]//div[@role='treeitem']/div/div
    listings = driver.find_elements(By.XPATH, "//div[@role='treeitem']")
    jobs_data = []
    for idx, listing in enumerate(listings):
        if idx == 50:  # Limit to n listings
            break
        listing.click()
        sleep(1)  # Allow time for the job details to load
        job_data = _get_job(driver)
        job_data["role_name"] = role_name
        job_data["company"] = company
        jobs_url = driver.current_url
        jobs_data.append(job_data)
    return jobs_data

def scroll_page(driver):
    driver.maximize_window()
    driver.implicitly_wait(10)
    for i in range(50):
        try:
            element = driver.find_element(By.XPATH, f"(//div[@class='PwjeAc'])[{i + 1}]")
            driver.execute_script("arguments[0].scrollIntoView(true);", element)
            sleep(1)
        except NoSuchElementException:
            print(f"No more listings to scroll. Total listings scrolled: {i}")
            break

    driver.execute_script("arguments[0].scrollIntoView(true);", driver.find_element(By.XPATH, "(//div[@class='PwjeAc'])[1]"))
    
def _get_job(driver):
    job_container = driver.find_element(By.XPATH, '//div[@class="whazf bD1FPe"]')
    job_id = _get_job_id(driver)
    company = _get_company(driver, job_container)
    role = _get_job_role(driver, job_container)
    location = _get_job_location(driver, job_container)
    posted_date = _get_job_posted(driver, job_container)
    description = _get_job_description(driver, job_container)
    current_url = driver.current_url
    return {"id": job_id, "company": company, "role": role, "location": location, "posted_date": posted_date, "description": description, "url": current_url}


def _get_job_id(driver):
    parsed_url = urllib.parse.urlparse(driver.current_url)
    return urllib.parse.parse_qs(parsed_url.fragment)['htidocid'][0]

def _get_company(driver, job_container):
    return job_container.find_element(By.XPATH, '//div[@class="nJlQNd sMzDkb"]').text

def _get_job_role(driver, job_container):
    job_role_element = job_container.find_element(By.XPATH, '//*[@id="tl_ditc"]/div[1]/div[1]/div[1]/div[1]/div[1]/h2[1]')
    return job_role_element.get_attribute('innerText')

def _get_job_location(driver, job_container):
    job_role_element = job_container.find_element(By.XPATH, '//*[@id="tl_ditc"]/div[1]/div[1]/div[1]/div[1]/div[2]/div[2]/div[2]')
    return job_role_element.get_attribute('innerText')

def _get_job_posted(driver, job_container):
    try:
        job_role_element = job_container.find_element(By.XPATH, '//*[@id="tl_ditc"]/div[1]/div[1]/div[3]/div[1]/span[2]/span[1]')
        return job_role_element.get_attribute('innerText')
    except NoSuchElementException:
        return None

def _get_job_description(driver, job_container):
    try:
        expand_description_button = job_container.find_element(By.XPATH, "div/div/div/div/div/div/div[@class='CdXzFe j4kHIf']")
        expand_description_button.click()
        sleep(1)  # Allow time for the description to expand
    except NoSuchElementException:
        pass
    return job_container.find_element(By.XPATH, ".//span[@class='HBvzbc']").text


if __name__ == '__main__':
    driver = webdriver.Chrome()
    role_names = ["AI Jobs"]
    companies = ["Google"]
    data = []
    for role_name, company in itertools.product(role_names, companies):
        data.extend(get_all_jobs(driver, role_name, company))
    driver.quit()

    df = pd.DataFrame(data)

https://www.google.com/search?q=AI Jobs in Google+Google&ibp=htl;jobs#htivrt=jobs
No more listings to scroll. Total listings scrolled: 42


In [4]:
df

Unnamed: 0,id,company,role,location,posted_date,description,url,role_name
0,ZuEz7bylo9sxdTWoAAAAAA==,Google,"Technical Solutions Consultant, AI, Google Cloud","New York, NY",25 days ago,Note: By applying to this position you will ha...,https://www.google.com/search?q=AI%20Jobs%20in...,AI Jobs in Google
1,v5NtqqnTarKvT-lWAAAAAA==,Google,"Partner Technology Manager, Data Analytics and AI","New York, NY",206K‚Äì302K a year,Minimum qualifications:\n‚Ä¢ 10 years of experie...,https://www.google.com/search?q=AI%20Jobs%20in...,AI Jobs in Google
2,AQNt0qR9rH5i9eJbAAAAAA==,Google,"Software Engineer, Machine Learning, Infrastru...","Sunnyvale, CA",2 days ago,Minimum qualifications:\n‚Ä¢ Bachelor's degree o...,https://www.google.com/search?q=AI%20Jobs%20in...,AI Jobs in Google
3,53F5gS624DWwDL20AAAAAA==,Google,"AI Principles Review Specialist, Health Applic...","Atlanta, GA",1 day ago,Note: By applying to this position you will ha...,https://www.google.com/search?q=AI%20Jobs%20in...,AI Jobs in Google
4,Ut63bf0ItbZLg2Z3AAAAAA==,Google,"Staff Software Engineer, Machine Learning, Goo...","San Francisco, CA",21 days ago,Note: By applying to this position you will ha...,https://www.google.com/search?q=AI%20Jobs%20in...,AI Jobs in Google
5,fFaP5ZpP0pETPXdgAAAAAA==,Google,"Product Manager, AI/ML Infrastructure","Sunnyvale, CA",15 days ago,Minimum qualifications:\n‚Ä¢ Bachelor's degree o...,https://www.google.com/search?q=AI%20Jobs%20in...,AI Jobs in Google
6,5VWZ55Q2KvNDlqu2AAAAAA==,Google,"Senior Staff Software Engineer, Google Cloud G...","New York, NY (+5 others)",14 days ago,"Minimum qualifications:\n‚Ä¢ Bachelor's degree, ...",https://www.google.com/search?q=AI%20Jobs%20in...,AI Jobs in Google
7,EslB-HzOHSUr7oH9AAAAAA==,Google,"Field Solutions Developer I, Generative AI, Go...","Washington, DC",22 days ago,The application window will be open until at l...,https://www.google.com/search?q=AI%20Jobs%20in...,AI Jobs in Google
8,BUmfM9KqWKOV2F5mAAAAAA==,Google,"AI Engineer, Google Cloud Consulting","Chicago, IL",7 days ago,Minimum qualifications:\n‚Ä¢ Bachelor's degree i...,https://www.google.com/search?q=AI%20Jobs%20in...,AI Jobs in Google
9,d1ZNV4L3Ai7t5bP9AAAAAA==,Google,"Staff Software Engineer, Google Cloud Generati...","Cambridge, MA",4 days ago,The application window will be open until at l...,https://www.google.com/search?q=AI%20Jobs%20in...,AI Jobs in Google



**References:**

- Adapted from Scraping by AIcore 
- Adapted from Google Serp API