The following mandatory tasks have been completed for Lab 6:
* Using an action chain – click the “Government” tab and click “Departments” in the top-left corner – use Xpaths to do this (4pts)
* In a try/finally clause & WebDriverWait, click on the “Personnel/Civil Service” link, then click on the “Employment” link in the left menu – use Xpaths to do this (3pts)
* Scrape the table of Job Openings – you can use Selenium or BeautifulSoup to do this (3pts)
* Put that data into a CSV file & read it with pandas (don’t forget to close the CSV file after you create it or it may not read in pandas!) 


In [None]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import csv
import pandas

PATH = Service("C:\Program Files (x86)\chromedriver.exe")
driver = webdriver.Chrome(service=PATH)

driver.get("https://www.binghamton-ny.gov/home")

# Gets the locations of government_menu and departments_heading
government_menu = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//li[@id='dropdownrootitem3']//a[@class='dropdownrootitem3']")))
departments_heading = driver.find_element(By.XPATH, "//li[@id='dropdownrootitem3']//a[@title='Departments']")

# Action chain to move to government_menu, then click on departments_heading
action_chain = ActionChains(driver)
action_chain.move_to_element(government_menu)
action_chain.click(departments_heading)
action_chain.perform()
    
try:
    # Waits for, then uses an actin chain to click on the personnel_link
    personnel_link = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//a[@href='https://www.binghamton-ny.gov/government/departments/personnel-civil-service']")))
    action_chain.click(personnel_link)
    action_chain.perform()

    # Waits for, then uses an action chain to click on the employment_link
    employment_link = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//a[@href='https://www.binghamton-ny.gov/government/departments/personnel-civil-service/employment']")))
    action_chain.click(employment_link)
    action_chain.perform()
    
    # Loads the page source into BeautifulSoup
    soup = BeautifulSoup(driver.page_source)
    
    # Opens our CSV for writing
    csv_file = open("binggov_job_openings.csv", "w", newline="", encoding="utf-8")
    csv_writer = csv.writer(csv_file)
    csv_writer.writerow(["job", "type", "deadline", "salary"])
    
    table = soup.find(class_="listtable responsive-table-data-mb").tbody
    table_rows = table.find_all("tr")
    
    # Writes each row of the table into our CSV
    for row in table_rows:
        table_entries = row.find_all("td")
        job_title = table_entries[0].text
        job_type = table_entries[1].text
        deadline = table_entries[2].text
        salary = table_entries[3].text
        csv_writer.writerow([job_title, job_type, deadline, salary])
    csv_file.close()
    
    # Reads our newly written CSV with pandas, then prints
    # We probably shouldn't reuse variables, but I'm only working with one CSV file 
    # at a time, so it should be OK?
    csv_file = pandas.read_csv("binggov_job_openings.csv")
    print(csv_file.to_string())
    
finally:
    driver.quit()

The following bonus task has been completed for Lab 6:
* Bonus (3pts) – build a program that can click on a job hyperlink (like City Engineer) and scrape all the data on the page from “Distinguishing Features” to “Minimum Qualifications” and then go back to the job opening page

In [62]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import csv
import pandas

PATH = Service("C:\Program Files (x86)\chromedriver.exe")
driver = webdriver.Chrome(service=PATH)

driver.get("https://www.binghamton-ny.gov/home")

# Gets the locations of government_menu and departments_heading
government_menu = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//li[@id='dropdownrootitem3']//a[@class='dropdownrootitem3']")))
departments_heading = driver.find_element(By.XPATH, "//li[@id='dropdownrootitem3']//a[@title='Departments']")

# Action chain to move to government_menu, then click on departments_heading
action_chain = ActionChains(driver)
action_chain.move_to_element(government_menu)
action_chain.click(departments_heading)
action_chain.perform()
    
try:
    # Waits for, then uses an actin chain to click on the personnel_link
    personnel_link = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//a[@href='https://www.binghamton-ny.gov/government/departments/personnel-civil-service']")))
    action_chain.click(personnel_link)
    action_chain.perform()

    # Waits for, then uses an action chain to click on the employment_link
    employment_link = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//a[@href='https://www.binghamton-ny.gov/government/departments/personnel-civil-service/employment']")))
    action_chain.click(employment_link)
    action_chain.perform()
    
    #----- Bonus task -----#
    csv_file = open("binggov_job_descriptions.csv", "w", newline="", encoding="utf-8")
    csv_writer = csv.writer(csv_file)
    csv_writer.writerow(["job", "distinguishing features"])
    
    # Defines the number of jobs we have to cycle through
    num_jobs = len(driver.find_elements(By.XPATH, "//table[@class='listtable responsive-table-data-mb']//tbody//tr//td[@data-th='Job']"))
    for job_index in range(num_jobs):
        # Waits for the table to load in
        WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//table[@class='listtable responsive-table-data-mb']//tbody//tr//td[@data-th='Job']")))
        
        # Refreshes the list of jobs every time we return to the page (otherwise we get a "stale element" exception)
        job_entry = driver.find_elements(By.XPATH, "//table[@class='listtable responsive-table-data-mb']//tbody//tr//td[@data-th='Job']")[job_index]
        job_title = job_entry.text
        job_entry.click()
        
        try:
            # Waits for the job description to be visible
            distinguishing_features = WebDriverWait(driver, 2).until(EC.presence_of_element_located((By.XPATH, "//div[@class='detail-content']//strong[contains(text(),'DISTINGUISHING FEATURES')]")))
            job_desc = distinguishing_features.find_element(By.XPATH, "..").text[37:]
        
        # If the page is badly formatted (i.e. inconsistent with the format of other pages), skip
        except:
            job_desc = "NO INFORMATION"
            
        # Writes the data to the csv, then goes back to the previous page to restart the process
        csv_writer.writerow([job_title, job_desc])
        driver.back()
    
    csv_file.close()
    csv_file = pandas.read_csv("binggov_job_descriptions.csv")
    print(csv_file.to_string())
    
    
finally:
    driver.quit()

                                                                 job                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          distinguishing features
0                                             Animal Control Officer                                                                                              