# Libs

In [31]:
import pytz
import aiohttp
import asyncio
import csv
from datetime import datetime, date, timedelta
from bs4 import BeautifulSoup

today = date.today()

# Parameters

In [35]:
start_date = date(2023, 7, 26) # yyyy, m, d or today
end_date = today # or date(2023, 7, 26); yyyy, m, d
timezone = pytz.timezone('America/Los_Angeles') # run import pytz and pytz.all_timezones to see all timezones
temp_unit = 'F' # C for Celsius and F for Fahrenheit


# url constructor

In [29]:
def collect_dates(start_date, end_date, timezone):
    start_datetime = timezone.localize(datetime.combine(start_date, datetime.min.time()))
    end_datetime = timezone.localize(datetime.combine(end_date, datetime.min.time()))
    delta = end_datetime - start_datetime
    date_details_list = []
    for i in range(delta.days + 1):
        day = start_datetime + timedelta(days=i)
        date_details = {
            'year': day.year,
            'month': day.month,
            'month_name': day.strftime('%B'),
            'day': day.day,
            'day_of_week': day.strftime('%A'),
            'iso_8601': day.isoformat()
        }
        date_details_list.append(date_details)
    return date_details_list

date_details_list = collect_dates(start_date, end_date, timezone)    

url = 'https://weatherspark.com/h/d/145255/'
urls = []
for i in date_details_list:
    
    url_add = f"{i['year']}/{i['month']}/{i['day']}/Historical-Weather-on-{i['day_of_week']}-{i['month_name']}-{i['day']}-{i['year']}-at-Hollister-Municipal-Airport-California-United-States"
    urls.append(url + url_add)

# fetching urls

In [24]:

async def fetch(session, url):
    async with session.get(url) as response:
        return await response.text()

async def fetch_all(urls):
    async with aiohttp.ClientSession() as session:
        tasks = []
        for url in urls:
            tasks.append(fetch(session, url))
        responses = await asyncio.gather(*tasks)
        return responses

html_list = await fetch_all(urls)

# Parce HTMLs

In [26]:
def parse_html(html, date_details, timezone):
    # Parse the HTML with BeautifulSoup
    soup = BeautifulSoup(html, 'html.parser')

    # Find all elements with an ID that starts with 'metar'
    elements = soup.select('[id^=metar]')

    data_dicts = []

    # Loop over each 'metar' element
    for element in elements:
        # Find all 'td' tags within the 'metar' element
        tds = element.find_all('td')
        
        # Check if 'tds' has at least 3 elements
        if len(tds) >= 3:
            # Extract the text from the relevant 'td' tags and strip leading/trailing whitespace
            time_str = tds[0].text.strip()
            temp_str = tds[1].text.strip()
            wind_str = tds[3].text.strip()

            # Parse the time string and convert it to a datetime object
            time_obj = datetime.strptime(time_str, '%I:%M %p').time()

            # Parse the 'iso_8601' string from date_details and convert it to a datetime object
            date_obj = datetime.fromisoformat(date_details['iso_8601'])

            # Combine the date from date_obj and the time from time_obj
            dt = datetime.combine(date_obj.date(), time_obj)

            # Make the datetime object aware of the timezone
            aware_dt = timezone.localize(dt)

            # Convert the datetime object to ISO 8601 format
            timestamp = aware_dt.isoformat()

            # Split the temperature and wind strings to separate the value and units
            temp, temp_units = temp_str.split('°')
            temp_units = '°' + temp_units  # add back the degree symbol

            # Check if temp_units is '°F', and if so, convert the temperature to Celsius
            if temp_unit == 'C':
                temp = round((float(temp) - 32) * 5/9)
                temp_units = '°C'
            else:
                pass

            wind, wind_units = wind_str.split(' ', 1)

            # Create a dictionary with the extracted data
            data_dict = {
                'timestamp': timestamp,
                'temp': temp,
                'temp_units': temp_units,
                'wind': wind,
                'wind_units': wind_units
            }

            # Add the dictionary to the list
            data_dicts.append(data_dict)

    return data_dicts

# Loop over each HTML string in html_list and parse it
all_data = []
for html, date_details in zip(html_list, date_details_list):
    data_dicts = parse_html(html, date_details, timezone)
    all_data.extend(data_dicts)

# Exporting in CSV

In [36]:
# Get the current date and time
now = datetime.now()

# Format the current date and time as a string
now_str = now.strftime("%Y%m%d_%H%M")

# Generate the CSV file name using the current date and time
csv_file_name = f"{now_str}.csv"

# Specify the field names for the CSV
fieldnames = ['timestamp', 'temp', 'temp_units', 'wind', 'wind_units']

# Open the CSV file in write mode
with open(csv_file_name, 'w', newline='') as csvfile:
    # Create a CSV DictWriter
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

    # Write the header
    writer.writeheader()

    # Write the rows
    for data_dict in all_data:
        writer.writerow(data_dict)