In [3]:
# Import necessary libraries
from selenium import webdriver  # Used to interact with the webpage
from bs4 import BeautifulSoup  # Used to parse HTML and extract data
import pandas as pd  # Used to store and manipulate data
from datetime import datetime  # Used to get the current date and time
import pytz  # Used to convert the current time to Eastern Standard Time

# Set up the Chrome webdriver
# This opens a new browser window that the script can control
driver = webdriver.Chrome()

# Open the URL of the webpage you want to scrape
url = "https://weather.gc.ca/index_e.html?layers=alert,#alerttable"
driver.get(url)

# Parse the HTML content of the webpage
# This creates a BeautifulSoup object that you can search to find specific HTML elements
soup = BeautifulSoup(driver.page_source, 'html.parser')

# Initialize lists to store the scraped data
# Each list will become a column in the final DataFrame
provinces = []
cities = []
warnings = []
statements = []
months = []
days = []
dates = []
times = []

# List of provinces to include in the scraped data
province_list = ["Northwest Territories", "Nunavut", "Ontario", "Yukon", "British Columbia", "Quebec", "Nova Scotia", "Manitoba", "Alberta"]

# List of warnings to classify alerts as warnings or statements
warning_list = ["Arctic outflow", "Blizzard", "Blowing snow", "Dust storm", "Extreme cold", "Flash freeze", "Fog", "Freezing drizzle", "Freezing rain", "Frost", "Heat", "Hurricane", "Rainfall", "Severe thunderstorm", "Snowfall", "Snow squall", "Storm surge", "Tornado", "Tropical storm", "Tsunami", "Weather", "Wind", "Winter storm"]

# Find all HTML elements on the webpage that match specific criteria
# The find_all method returns a list of all elements that match the specified tag and attributes
province_elements = soup.find_all('b', {'data-v-b8dde33c': ''})
city_elements = soup.find_all('a', {'title': 'Click to see on map'})
alert_elements = soup.find_all('a', {'title': 'Click to see alert information', 'class': 'ga-map-table-alert-link'})

# If the province and city are found, add them to the respective lists
# This loop goes through each province, city, and alert element and adds their text to the respective lists
if province_elements and city_elements and alert_elements:
    for province, city, alert in zip(province_elements, city_elements, alert_elements):
        # Check if the province is in the list of provinces
        if province.text in province_list:
            provinces.append(province.text)
            cities.append(city.text)
            
            # Check if the alert is a warning or a statement
            if any(warning in alert.text for warning in warning_list):
                warnings.append(alert.text)
                statements.append(None)
            else:
                warnings.append(None)
                statements.append(alert.text)
            
            # Get the current date and time in Eastern Standard Time
            eastern = pytz.timezone('US/Eastern')
            now = datetime.now(eastern)
            month = now.strftime('%B')
            day = now.strftime('%A')
            date = now.strftime('%Y-%m-%d')
            time = now.strftime('%H:%M:%S')
            
            # Add the current date and time to the lists
            months.append(month)
            days.append(day)
            dates.append(date)
            times.append(time)

# Convert the lists to a pandas DataFrame
# This creates a DataFrame where each list is a column and each element in the list is a row
df = pd.DataFrame({
    'Province': provinces,
    'City': cities,
    'Warning': warnings,
    'Statement': statements,
    'Month': months,
    'Day': days,
    'Date': dates,
    'Time': times
})

# Close the driver
# This closes the browser window that the script was controlling
driver.quit()

# Print the DataFrame
# This displays the DataFrame in the console so you can see the scraped data
print(df)

# Save the DataFrame to a CSV file
# This creates a CSV file in the same directory as your script and writes the DataFrame to it
df.to_csv('weather_data.csv', index=False)

# Save the DataFrame to a JSON file
# This creates a JSON file in the same directory as your script and writes the DataFrame to it
df.to_json('weather_data.json', orient='records')


                 Province                                               City  \
0        British Columbia   East Vancouver Island - Nanoose Bay to Fanny Bay   
1   Northwest Territories                                         Howe Sound   
2             Nova Scotia                            Inland Vancouver Island   
3                 Nunavut  Metro Vancouver - central including the City o...   
4                 Ontario  Metro Vancouver - North Shore including West V...   
5                   Yukon  Metro Vancouver - northeast including Coquitla...   
6        British Columbia                             North Vancouver Island   
7   Northwest Territories             Sunshine Coast - Gibsons to Earls Cove   
8             Nova Scotia       Sunshine Coast - Saltery Bay to Powell River   
9                 Nunavut                              West Vancouver Island   
10                Ontario                                     Aklavik Region   
11                  Yukon  South Delta R