## <span style="color:white">Selenium Scraping Code</span>

In [37]:
def generate_date_ranges(from_year=2006, from_mo=5, to_year=2018, up_to_mo=10):
    '''
    Returns series of dates in YYYY-MM format using to and from parameters
    '''
    cur_year, cur_mo = from_year, from_mo
    dates_list = []

    while str(cur_year) + str(cur_mo) != str(to_year) + str(up_to_mo):
        dates_list.append((str(cur_year) + '-' + "{:02d}".format(cur_mo)))
        if cur_mo == 12:
            cur_year += 1
            cur_mo = 1
        else:
            cur_mo += 1
    return dates_list
            
dates_list = generate_date_ranges()

### Scraping code for stations in NOAA website main select menu

In [36]:
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select

weather_cols = ['Date', 'TMax', 'TMin', 'TAvg', 'TDep', 'HDD', 'CDD', 'Precip', 'NewSnow', 'SnowDepth']
stations_list = ['San Francisco City, CA', 'Sonoma Cnty AP, CA', 'San Jose, CA', 'Santa Cruz, CA', 'Livermore, CA', 'Monterey WFO, CA']
dates_list = generate_date_ranges()

driver = webdriver.Firefox()
driver.get("https://nowdata.rcc-acis.org/mtr/")

for station in stations_list:
    weather_rows = []
    for cur_date in dates_list:
        station_select = Select(driver.find_element_by_name("station"))
        station_select.select_by_visible_text(station)
        date_input = driver.find_element_by_id("tDatepicker")
        date_input.clear()
        date_input.send_keys(cur_date)
        date_submit = driver.find_element_by_id("go")
        date_submit.send_keys(Keys.RETURN)
        time.sleep(5)
        result_list = [result.text for result in driver.find_element_by_id("results_area").find_element_by_tag_name("tbody").find_elements_by_tag_name("td")]
        x = 0
        while x < len(result_list):
            weather_rows.append(result_list[x:(x + 10)])
            x += 10
            if x > 500:
                print('breaking')
                break
        close_button = driver.find_element_by_class_name("ui-dialog-titlebar-close")
        close_button.send_keys(Keys.RETURN)
    weather_df = pd.DataFrame(weather_rows, columns=weather_cols)
    weather_df.to_csv(station + '.csv', index=False)
driver.close()

### Scraping code for stations requiring pushpin-on-image-map clicks

In [38]:
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select

weather_cols = ['Date', 'TMax', 'TMin', 'TAvg', 'TDep', 'HDD', 'CDD', 'Precip', 'NewSnow', 'SnowDepth']
dates_list = generate_date_ranges()
### adjusting dates to account for months with missing data on website
dates_list[20] = '2007-12'
dates_list[75] = '2012-07'

driver = webdriver.Firefox()
driver.get("https://nowdata.rcc-acis.org/mtr/")

view_map = driver.find_element_by_id("viewMap")
view_map.send_keys(Keys.RETURN)
time.sleep(5)

show_stations = driver.find_element_by_id("showStations")
show_stations.send_keys(Keys.RETURN)
time.sleep(5)

### selecting pushpin by z-index declaration (for Berkeley)
pushpin = driver.find_elements_by_css_selector("[style*='z-index: 116']")
pushpin[2].send_keys(Keys.RETURN)

weather_rows = []
for cur_date in dates_list:
    date_input = driver.find_element_by_id("tDatepicker")
    date_input.clear()
    date_input.send_keys(cur_date)
    date_submit = driver.find_element_by_id("go")
    date_submit.send_keys(Keys.RETURN)
    time.sleep(5)
    result_list = [result.text for result in driver.find_element_by_id("results_area").find_element_by_tag_name("tbody").find_elements_by_tag_name("td")]
    x = 0
    while x < len(result_list):
        weather_rows.append(result_list[x:(x + 10)])
        x += 10
        if x > 500:
            print('breaking')
            break
    close_button = driver.find_element_by_class_name("ui-dialog-titlebar-close")
    close_button.send_keys(Keys.RETURN)
weather_df = pd.DataFrame(weather_rows, columns=weather_cols)
weather_df.to_csv('w_Berkeley.csv', index=False)
    
driver.close()

NoSuchWindowException: Message: Browsing context has been discarded
