Part I: Completing the third Selenium Tutorial (5pts)

In [12]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support import expected_conditions as EC
import time

# Defines the Chrome driver we use to access the web
PATH = Service("C:\Program Files (x86)\chromedriver.exe")
driver = webdriver.Chrome(service=PATH)

# The Chrome browser opens the page https://www.techwithtim.net/ 
driver.get("https://www.techwithtim.net/")

# Chrome finds the element with the text "Python Programming", and clicks on it
link = driver.find_element(By.LINK_TEXT, "Python Programming")
link.click()

try:
    # Chrome waits for the element/link with the text "Beginner Python Tutorials" to load, then clicks on it
    # If we were accessing a search bar, element.clear() would clear the text input field
    element = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.LINK_TEXT, "Beginner Python Tutorials"))
    )
    # element.clear()
    element.click()
    
    # Chrome waits for the unique element "sow-button-19310003" to load, then clicks on it
    element = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.ID, "sow-button-19310003"))
    )
    element.click()
    
    # Chrome goes back three pages, the goes forward two pages to land on the "Beginner Python Tutorials Page"
    driver.back()
    driver.back()
    driver.back()
    driver.forward()
    driver.forward()

# If an error is encountered at any part of the process, quit the browser
except:
    driver.quit()


Part II: Scraping https://data.gov/ for climate change search results (5pts)

In [34]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support import expected_conditions as EC
import time
import csv

# Accesses our CSV file in advance
csv_file = open("data_gov_climate_change.csv", "w", newline="", encoding="utf-8")
csv_writer = csv.writer(csv_file)
csv_writer.writerow(["title", "department", "desc"])

PATH = Service("C:\Program Files (x86)\chromedriver.exe")
driver = webdriver.Chrome(service=PATH)

driver.get("https://data.gov/")

try:
    search =  WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.ID, "search-header"))
    )
    search.clear()
    search.send_keys("climate change")
    search.send_keys(Keys.RETURN)
    
    results_container =  WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.CLASS_NAME, "dataset-list"))
    )
    
    results = results_container.find_elements(By.CLASS_NAME, "dataset-content")
    
    for result in results:
        title = result.find_element(By.TAG_NAME, "a").text
        desc = result.find_element(By.CLASS_NAME, "notes")
        department = desc.find_element(By.CLASS_NAME, "dataset-organization").text.strip(" —")
        desc_text = desc.find_element(By.TAG_NAME, "div").text
        
        csv_writer.writerow([title, department, desc_text])
    
except:
    driver.quit()
    
csv_file.close()

Part III (Bonus): Scraping the Supreme Court website for Court Opinions (2pts)

Note: Wow, this was quite hard! I must have spent 2-3 hours just working out the kinks in this. I'm not sure if this was worth the extra 2 points, but it was definitely worth the experience!

In [107]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support import expected_conditions as EC
import time
import csv

# Accesses our Supreme Court Opinions csv
csv_file = open("supreme_court_opinions.csv", "w", newline="", encoding="utf-8")
csv_writer = csv.writer(csv_file)
csv_writer.writerow(["date", "docket", "name", "volume", "link"])

# Defines the Chrome driver we use to access the web
PATH = Service("C:\Program Files (x86)\chromedriver.exe")
driver = webdriver.Chrome(service=PATH)

# Defines the range of years (therefore pages) which we will scrape
starting_year = "2016"
ending_year = "2022"

for year in range(int(starting_year[2:4]), int(ending_year[2:4])+1):
    driver.get(f"https://www.supremecourt.gov/opinions/slipopinion/{year}")
    
    try:
        # Finds the container which holds all the tables
        all_cases =  WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.ID, "list"))
        )

        # Opens all collapsible tables by clicking on them, so their data can be read
        # Skips the first table, which is open by default
        table_toggles = all_cases.find_elements(By.XPATH, "//a[@class='accordion-toggle']")
        for toggle in table_toggles[1:]:
            toggle.click()
            time.sleep(0.3)

        # Finds all of the tables in the overarching container
        tables = all_cases.find_elements(By.CLASS_NAME, "table")

        # Breaks down the tables into rows and columns, and enters each column as data into the CSV
        for table in tables:
            rows = table.find_elements(By.TAG_NAME, "tr")
            for row in rows:
                columns = row.find_elements(By.TAG_NAME, "td")

                # If the columns are empty, it must be a header row, therefore skip
                if columns == []:
                    continue

                # This iterable lets us keep track of which column we're on
                column_pointer = iter(range(7))
                for column in columns:
                    current_column = next(column_pointer)

                    # Depending on which column we're on, assign it to a different variable
                    if current_column == 1:
                        date = column.text
                    elif current_column == 2:
                        docket = column.text
                    elif current_column == 3:
                        name = column.text
                        link = column.find_element(By.TAG_NAME, "a").get_attribute("href")
                    elif current_column == 6:
                        volume = column.text
                        
                # Writes our data into our CSV
                csv_writer.writerow([date, docket, name, volume, link])
        
    except:
        print("Some error has occured!")
        driver.quit()

csv_file.close()
driver.quit()