In [2]:
# Import essential libraries
from bs4 import BeautifulSoup
import requests
import time
import csv

In [3]:
# Create list of eight beaches near Gainesville, including their name and url
beach_list = [{'name':'Cedar Key', 'url':'https://weather.com/weather/hourbyhour/l/Cedar+Key+FL?canonicalCityId=bb28a62d2243110b4fb73bc365ff5055a642b46474198b2d0d95907489f821f9'}, {'name':'Crescent Beach', 'url':'https://weather.com/weather/hourbyhour/l/Crescent+Beach+FL+USFL0994:1:US'}, {'name':'Fernandina Beach', 'url':'https://weather.com/weather/hourbyhour/l/Fernandina+Beach+FL?canonicalCityId=e2b080b1185d1ed5dddf546006ca29ba4c56243763a9fe5bc3ba12868343bad9'}, {'name':'Flagler Beach', 'url':'https://weather.com/weather/hourbyhour/l/Flagler+Beach+FL?canonicalCityId=257a6dc20177db56e73faaf6bf1c6a984bdeec76761d81764e944aaa85b25ca1'}, {'name':'Jacksonville Beach', 'url':'https://weather.com/weather/hourbyhour/l/Jacksonville+Beach+FL?canonicalCityId=8cd17a4ef12fc87328ae54dd7a3adb84f37d3ef7b70c791800e070c8261f5f6c'}, {'name':'Neptune Beach', 'url':'https://weather.com/weather/hourbyhour/l/Neptune+Beach+FL?canonicalCityId=41fd51fa06c503f9abf98d97876d9b6618e773c69d8b4c8c0160b52b0abb696b'}, {'name':'St. Augustine Beach', 'url':'https://weather.com/weather/hourbyhour/l/587a164e718da1d014e46176e4dceae6c76525f26ed40c4a79fa14deb1a8d33f'}, {'name':'St. Pete Beach', 'url':'https://weather.com/weather/hourbyhour/l/c929d484ba3a462ae3f93dcd8040ca1e28a85e81d2c75da93da25469d38f4367'}]

# Test beach_list scraping with one url
# beach_list = [{'name':'Cedar Key', 'url':'https://weather.com/weather/hourbyhour/l/Cedar+Key+FL?canonicalCityId=bb28a62d2243110b4fb73bc365ff5055a642b46474198b2d0d95907489f821f9'}]

# Create list for hourly weather data
hourly_weather_data = []

# Create a function for scraping the beach weather URLs
def scrape_hourly_weather(list_name):
    # Loop through each beach to scrape the hourly weather page for each beach
    for beach in list_name:
        url = beach['url']
        page = requests.get(url)
        soup = BeautifulSoup(page.text, 'html.parser')

        # Parent element containing every row of weather data
        parent = soup.find('section', {'data-testid': 'HourlyForecast'})

        # Header elements which display the day/date
        headers = parent.find_all('h2', {'class': 'HourlyForecast--longDate--J_Pdh'})

        # Find all rows of weather data after the second header, meaning every row (hour) of weather data for the following day
        weather_rows = headers[1].find_all_next('summary', attrs={'class': 'Disclosure--Summary--3GiL4'})

        # For each beach's weather page, loop through select rows of weather data to get the hourly weather for the following day, from 5am to 10pm
        for row in weather_rows[5:23]:
            hour = row.find('h3', attrs={'data-testid': 'daypartName'}).text.strip()
            temp = row.find('span', attrs={'data-testid': 'TemperatureValue'}).text.strip()[:2]
            cloudiness = row.find('span', attrs={'class': 'DetailsSummary--extendedData--307Ax'}).text.strip()
            wind = row.find('span', attrs={'data-testid': 'Wind'}).text.strip()
            rain_chance = row.find('span', attrs={'data-testid': 'PercentageValue'}).text.strip()

            # Append hourly weather data to a list, which will later be used to write the CSV
            hourly_weather_data.append({'name':beach['name'], 'hour':hour, 'temp':temp, 'cloudiness':cloudiness, 'wind':wind, 'rain_chance':rain_chance})

        # Indicate when each beach's weather page has been scraped
        print('Finished scraping ' + str(beach['name']))

        # Add a short delay between each weather page, to avoid possibly overwhelming the server with requests and getting blocked
        time.sleep(3)
        
# Call the function, passing the beach_list list
scrape_hourly_weather(beach_list)

Finished scraping Cedar Key
Finished scraping Crescent Beach
Finished scraping Fernandina Beach
Finished scraping Flagler Beach
Finished scraping Jacksonville Beach
Finished scraping Neptune Beach
Finished scraping St. Augustine Beach
Finished scraping St. Pete Beach


In [4]:
# Create a function for writing the hourly beach weather data to a CSV
def write_csv(data_list, filename):
    # Open a new CSV file
    myfile = open(filename, 'w')

    # Make a Python CSV writer object
    writer = csv.writer(myfile)

    # Write the column headings row 
    writer.writerow(['Name', 'Time (Hour)', 'Temperature', 'Cloudiness', 'Wind', 'Chance of Rain'])
    
    # Loop through the list of weather data and write a row of weather data for each hour that was appended to the hourly_weather_data list
    for weather_hour in data_list:
        writer.writerow([weather_hour['name'], weather_hour['hour'], weather_hour['temp'], weather_hour['cloudiness'], weather_hour['wind'], weather_hour['rain_chance']])
     
    # Close and save the file
    myfile.close()
    
    print('Finished writing the CSV!')
    
# Call the function, passing the hourly_weather_data list and a filename
write_csv(hourly_weather_data, 'hourly_beach_weather.csv')

Finished writing the CSV!
