In [10]:
import pandas as pd

# Pull up and read the CSV file
file_path = r'C:\Users\evane\OneDrive\Desktop\Personal Research\Santa_Fe_Hourly_Feel_2014-2024.csv'
data = pd.read_csv(file_path)

# Convert the 'feel' column to numeric
data['feel'] = pd.to_numeric(data['feel'], errors='coerce')

# Convert the 'valid' column to datetime
data['valid'] = pd.to_datetime(data['valid'])

# Create a new column for the date only (without time)
data['date'] = data['valid'].dt.date

# Filter the DataFrame for entries with 'feel' between 50 and 55
filtered_data = data[(data['feel'] >= 50) & (data['feel'] <= 55)]

# Group by the date and count the number of unique dates where there is at least one valid entry
count_days = filtered_data['date'].nunique()

# Calculate the number of years in the dataset
start_year = data['valid'].dt.year.min()
end_year = data['valid'].dt.year.max()
num_years = end_year - start_year + 1

# Calculate the average number of days per year
average_days_per_year = count_days / num_years

# Display the average
print(f'Average number of days per year with at least one measurement between 50 and 55 degrees: {average_days_per_year:.0f}')

Average number of days per year with at least one measurement between 50 and 55 degrees: 192


Let's turn this into a function so we can do the same thing for all our other data

In [13]:
import pandas as pd

# Define your file paths
file_paths = [
    r'C:\Users\evane\OneDrive\Desktop\Personal Research\average_annual_ideal_running_temp_days\Santa_Fe_Hourly_Feel_2014-2024.csv',
    r'C:\Users\evane\OneDrive\Desktop\Personal Research\average_annual_ideal_running_temp_days\Laramie_Hourly_Feel_2014-2024.csv',
    r'C:\Users\evane\OneDrive\Desktop\Personal Research\average_annual_ideal_running_temp_days\Cheyenne_Hourly_Feel_2014-2024.csv',
    r'C:\Users\evane\OneDrive\Desktop\Personal Research\average_annual_ideal_running_temp_days\Colorado_Springs_Hourly_Feel_2014-2024.csv',
    r'C:\Users\evane\OneDrive\Desktop\Personal Research\average_annual_ideal_running_temp_days\Castle_Rock_Hourly_Feel_2014-2024.csv',
    r'C:\Users\evane\OneDrive\Desktop\Personal Research\average_annual_ideal_running_temp_days\Flagstaff_Hourly_Feel_2014-2024.csv'
]

# Same process as above, just turned into a function
def average_annual_ideal_run_temp_days(file_path):
    data = pd.read_csv(file_path)
    data['feel'] = pd.to_numeric(data['feel'], errors='coerce')
    data['valid'] = pd.to_datetime(data['valid'])
    data['date'] = data['valid'].dt.date
    filtered_data = data[(data['feel'] >= 50) & (data['feel'] <= 55)]
    count_days = filtered_data['date'].nunique()
    start_year = data['valid'].dt.year.min()
    end_year = data['valid'].dt.year.max()
    num_years = end_year - start_year + 1
    average_days_per_year = count_days / num_years
    return average_days_per_year

# Store the results in a dictionary
results = {}

# Loop through the file paths and call the function
for file_path in file_paths:
    city_name = file_path.split('\\')[-1].split('_')[0]  # Extract city name from file path
    average_days = average_annual_ideal_run_temp_days(file_path)
    results[city_name] = average_days

# Print the results
for city, avg_days in results.items():
    print(f"Average ideal run temperature days for {city}: {avg_days:.0f}")

Average ideal run temperature days for Santa: 192
Average ideal run temperature days for Laramie: 186
Average ideal run temperature days for Cheyenne: 179
Average ideal run temperature days for Colorado: 182
Average ideal run temperature days for Castle: 178
Average ideal run temperature days for Flagstaff: 217
