In [1]:
import os
import time
from datetime import datetime
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import pandas as pd
import warnings
warnings.filterwarnings('ignore', category=DeprecationWarning)

# **Selenium**

**Selenium is a package that automates web browsing. The WebDriver tool from Selenium enables us to access and navigate webpages. Functions such as scraping data from a site can be automated using the tool.**

<sup>Source: [Selenium Documentation](https://www.selenium.dev/documentation/)</sup>

## **Web Development Terms**

**CSS Selectors are the mechanism by which CSS rules are matched to elements.**

**In HTML, the `div` tag is the division or section of a website that contains a group of elements. In our case, one division contains the name of the hospital and the other contains the ER wait times.**

<sup>Source: [CSS Master](https://books.google.com/books/about/CSS_Master.html?id=GBk-EAAAQBAJ) by Tiffany Brown</sup>

<sup>Source: [The Content Division element](https://developer.mozilla.org/en-US/docs/Web/HTML/Element/div) from Mozilla.org</sup>

In [2]:
os.chdir(r'C:\Users\Adrian\Downloads')

In [3]:
os.getcwd()

'C:\\Users\\Adrian\\Downloads'

In [4]:
chrome_options = Options()

#run Chrome in headless mode
chrome_options.add_argument('--headless')

In [5]:
driver = webdriver.Chrome('chromedriver',chrome_options=chrome_options)

In [6]:
# url of website
url = 'https://www.virtua.org/patient-tools/er-wait-times'

# to open the url in the browser
driver.get(url)

# CSS selector path for hospital names
name_path_list = []
for i in range(2,11):
  name_path_list.append(f'div.card:nth-child({i}) > div:nth-child(1) > div:nth-child(2) > h3:nth-child(1)')

# CSS selector path for ER wait times
er_time_path_list = []

for i in range(2,11):
  er_time_path_list.append(f'div.card:nth-child({i}) > div:nth-child(1) > div:nth-child(2) > b:nth-child(2)')

In [7]:
for name,er_time in zip(name_path_list,er_time_path_list):
  print(driver.find_element_by_css_selector(name).text,
        driver.find_element_by_css_selector(er_time).text)

Berlin Emergency Department 2 mins
Camden Emergency Department 28 mins
Virtua Marlton Hospital 7 mins
Virtua Memorial Hospital 45 mins
Virtua Memorial Hospital Pediatric Pavilion 2+ hrs
Virtua Our Lady Of Lourdes Hospital 18 mins
Virtua Voorhees Hospital 16 mins
Virtua Voorhees Hospital Pediatric ER 12 mins
Virtua Willingboro Hospital 19 mins


In [8]:
er = []
wait_time = []
curr_time = []

#create a bot to scrape the data
#if set to "while True" the bot will run until the script is stopped manually
#if set to "len (er)" the bot will run until the number of data points are appended to the list
while len(er) < 9*3:
    
    driver.refresh()
    for name,er_time in zip(name_path_list,er_time_path_list):
        er.append(driver.find_element_by_css_selector(name).text)
        wait_time.append(driver.find_element_by_css_selector(er_time).text)
        #date/time that the data was scraped
        curr_time.append(datetime.now().strftime('%m/%d/%Y %H:%M'))
        
    
    #time between scrapes in seconds. 15 mins = 900
    time.sleep(900)

In [9]:
df = pd.DataFrame({'Hospital Name': er, 'Wait Time': wait_time, 'Date/Time Scraped':curr_time})

In [10]:
df.sort_values(['Hospital Name','Date/Time Scraped'])

Unnamed: 0,Hospital Name,Wait Time,Date/Time Scraped
0,Berlin Emergency Department,3 mins,01/19/2022 11:31
9,Berlin Emergency Department,4 mins,01/19/2022 11:46
18,Berlin Emergency Department,4 mins,01/19/2022 12:02
1,Camden Emergency Department,28 mins,01/19/2022 11:31
10,Camden Emergency Department,27 mins,01/19/2022 11:46
19,Camden Emergency Department,27 mins,01/19/2022 12:02
2,Virtua Marlton Hospital,8 mins,01/19/2022 11:31
11,Virtua Marlton Hospital,7 mins,01/19/2022 11:46
20,Virtua Marlton Hospital,7 mins,01/19/2022 12:02
3,Virtua Memorial Hospital,39 mins,01/19/2022 11:31


# **References and Additional Learning**

## **Data**

- **[ER Wait Times](https://www.virtua.org/patient-tools/er-wait-times) from Virtua.org**

## **Textbook**

- **[CSS Master](https://books.google.com/books/about/CSS_Master.html?id=GBk-EAAAQBAJ) by Tiffany Brown**

## **Websites**

- **[CSS selectors](https://developer.mozilla.org/en-US/docs/Web/CSS/CSS_Selectors) from Mozilla.org**

- **[Firefox and Chromium](https://madaidans-insecurities.github.io/firefox-chromium.html) and Sandboxing by madaidan**

- **[Selenium Documentation](https://www.selenium.dev/documentation/)**

- **[The Content Division element](https://developer.mozilla.org/en-US/docs/Web/HTML/Element/div) from Mozilla.org**

# **Connect**
- **Feel free to connect with Adrian on [YouTube](https://www.youtube.com/channel/UCPuDxI3xb_ryUUMfkm0jsRA), [LinkedIn](https://www.linkedin.com/in/adrian-dolinay-frm-96a289106/), [Twitter](https://twitter.com/DolinayG) and [GitHub](https://github.com/ad17171717). Happy coding!**