In [35]:
import pandas as pd

# Function to load data from CSV
def load_data(file_path):
    """Load specific columns from a CSV file."""
    return pd.read_csv(file_path, usecols=[1, 2])

# Function to preprocess data types
def preprocess_data(data):
    """Convert columns to appropriate data types."""
    data["valid"] = pd.to_datetime(data["valid"], errors="coerce")
    data["feel"] = pd.to_numeric(data["feel"], errors="coerce")
    return data

# Function to localize and convert time to a specific timezone
def convert_to_timezone(data, time_zone):
    """Convert datetime column to specified timezone and remove timezone information."""
    data["valid"] = data["valid"].dt.tz_localize("UTC").dt.tz_convert(time_zone)
    data["valid"] = data["valid"].dt.tz_localize(None)
    return data

# Function to filter data for daylight hours (6 am - 6 pm)
def filter_daylight_hours(data):
    """Filter data for times between 6 am and 6 pm."""
    return data[(data["valid"].dt.hour >= 6) & (data["valid"].dt.hour < 18)]

# Function to filter for ideal running temperatures
def filter_ideal_temps(data):
    """Filter data for temperatures between 50 and 63.5 degrees."""
    return data[(data["feel"] >= 50) & (data["feel"] <= 63.5)]

# Function to identify complete years in the dataset
def get_complete_years(data):
    """Identify years with data spanning from January to December."""
    years = data["valid"].dt.year.unique()
    complete_years = []
    
    for year in years:
        year_data = data[data["valid"].dt.year == year]
        if (year_data["valid"].dt.month.min() == 1) and (year_data["valid"].dt.month.max() == 12):
            complete_years.append(year)
            
    return complete_years

# Function to filter data to include only complete years
def filter_complete_years(data, complete_years):
    """Filter data to include only entries from complete years."""
    return data[data["valid"].dt.year.isin(complete_years)]

# Function to calculate the average annual ideal running temperature days
def calculate_average_ideal_days_per_year(filtered_data, complete_years):
    """Calculate the average number of ideal temperature days per year."""
    count_days = filtered_data["valid"].dt.date.nunique()
    num_years = len(complete_years)
    return count_days / num_years if num_years > 0 else 0

# Main function to execute the full pipeline
def average_annual_ideal_run_temp_days(file_path, time_zone):
    """Calculate the average annual ideal run temperature days for the dataset."""
    data = load_data(file_path)
    data = preprocess_data(data)
    data = convert_to_timezone(data, time_zone)
    data_daylight = filter_daylight_hours(data)
    filtered_data = filter_ideal_temps(data_daylight)
    complete_years = get_complete_years(filtered_data)
    complete_data = filter_complete_years(filtered_data, complete_years)
    return calculate_average_ideal_days_per_year(complete_data, complete_years)

Average ideal run temperature days for Santa Fe: 267
Average ideal run temperature days for Laramie: 224
Average ideal run temperature days for Flagstaff: 263
Average ideal run temperature days for Colorado Springs: 238
Average ideal run temperature days for Cheyenne: 214
Average ideal run temperature days for Castle Rock: 228


Let's turn this into a function so we can do the same thing for all our other data

In [36]:
# Initialize a dictionary to store the results
results = {}

# Loop through the dataset dictionary and call the function
for file_name, time_zone in dataset.items():
    # Call the function with the file path and time zone
    average_days = average_annual_ideal_run_temp_days(file_name, time_zone)
    city_name = file_name.split("_Hourly_Feel_2014-2024.csv")[0]
    city_name = city_name.replace("_", " ")
    # Store the result in the results dictionary
    results[city_name] = average_days

# Print the results
for city, avg_days in results.items():
    print(f"Average ideal run temperature days for {city}: {avg_days:.0f}") 

Average ideal run temperature days for Santa Fe: 267
Average ideal run temperature days for Laramie: 224
Average ideal run temperature days for Flagstaff: 263
Average ideal run temperature days for Colorado Springs: 238
Average ideal run temperature days for Cheyenne: 214
Average ideal run temperature days for Castle Rock: 228
