### Imports

In [105]:
import pandas as pd
import requests
import json
import numpy as np
from sklearn.metrics.pairwise import haversine_distances

# Import Meteostat library and dependencies
from datetime import datetime
from meteostat import Point, Daily

### Get List of Every National Park in the US
- If needed, figure out zipcode

In [2]:
def get_nps_park_info():
    # API endpoint for the National Park Service API
    API_ENDPOINT = 'https://developer.nps.gov/api/v1/parks'

    # API key for accessing the National Park Service API
    API_KEY = 'kdwYf9g5hjZpiFLxsM8bhkGExV9rIvznz43qcmwO'

    # Request parameters
    params = {
        'api_key': API_KEY,
        'limit': 500,  # Maximum number of parks per request
        'countryCode': 'US',  # Limit results to US national parks
    }

    # Send initial GET request to the API
    response = requests.get(API_ENDPOINT, params=params)
    data = response.json()

    # Check if the request was successful
    if response.status_code == 200:

        # Retrieve the list of parks from the initial response
        parks = data['data']
        return parks
    
    else:
        print(response.status_code)
        return None

In [3]:
parks = get_nps_park_info()

In [8]:
def filter_nps_park_info(parks):
    # Iterate over each park and save all information
    park_info = []
    for park in parks:
        
        # If there isnt a physical address, dont keep this park
        if len(park['addresses']) == 0:
            continue
            
#         adresses = pd.DataFrame(park['addresses'])
#         postal_codes = adresses[adresses['type'] == 'Physical']['postalCode'].values

        park_info.append([park['fullName'], park['latitude'], park['longitude'], park['states']]) #postal_codes
    
    return park_info

In [113]:
def format_and_clean_nps_park_info(park_info):
    cols = ['park', 'lat', 'lon', 'states'] #, 'postal_codes'
    types = ['str', 'float', 'float', 'str'] #, 'str'
    
    df = pd.DataFrame(park_info, columns=cols)
    
    clean_df = df.replace('', float('nan')).dropna()
    
    typed_df = clean_df.astype(dict(zip(cols, types)))
    
    return typed_df

In [114]:
nps_parks_df = format_and_clean_nps_park_info(filter_nps_park_info(parks))

# df['postal_codes'].str.replace(r'[0-9]', '.', regex=True).unique()

### Add Distance From Home

In [133]:
def latitude_longitude_distance(start_location, end_location, earths_radius_in_miles=6371000/1.609344):
    # Turn latitude/longitude into radians
    start_location = np.deg2rad(start_location)
    end_location = np.deg2rad(end_location)
    
    # Get the distance between that start location(s) and end location(s) in miles
    return haversine_distances(start_location, end_location) * earths_radius_in_miles

current_loc = [[33.822460, -84.328970]]
nps_parks_df['dist'] = latitude_longitude_distance(nps_parks_df[['lat', 'lon']], current_loc)

### Get Monthly Avg Weather for Given Locations
#### Sources that didnt work (due to cost or ambiguities)
- OpenWeatherMap (paid)
- Tomorrow.io (paid)
- NCEI (zipcode ambiguity) https://www.ncei.noaa.gov/cdo-web/search?datasetid=NORMAL_DLY
- meteostat (doesn't have weather for most "National Parks")

In [208]:
# May return an empty dataframe
def get_daily_weather(lat, lon, start_year, start_month, start_day, end_year, end_month, end_day):
    
    # Create Point for current park
    location = Point(lat, lon)
    
    # Set time period
    start = datetime(start_year, start_month, start_day)
    end = datetime(end_year, end_month, end_day)

    # Get daily data for location
    year_data = Daily(location, start, end).fetch()
    
    if not year_data.empty:
        year_data = year_data.reset_index()

    return year_data

In [135]:
def get_daily_weather_for_park(park_info, year, month, start_day, end_day):
    
    data = get_daily_weather(park_info['lat'], park_info['lon'], year, month, start_day, year, month, end_day)
    data['park'] = park_info['park']
    data['lat'] = park_info['lat']
    data['lon'] = park_info['lon']
    data['dist'] = park_info['dist']
    
    return data

In [138]:
def celsius_to_farenheit(celsius_temp):
    return (celsius_temp * 9/5) + 32

In [136]:
month = 8
start_day = 9
end_day = 20

# Get weather data for the set days, for the past 12 years 
unformatted_weather_data = []
for year in range(2010, 2023):
    list_of_weather_dfs = nps_parks_df.apply(lambda park_info: get_daily_weather_for_park(park_info, year, month, start_day, end_day), axis=1).to_list()
    unformatted_weather_data.extend(list_of_weather_dfs)

weather_data = pd.concat(unformatted_weather_data)

# Turn temperature data into farenheit
temp_cols = ['tavg', 'tmin', 'tmax']
weather_data[temp_cols] = celsius_to_farenheit(weather_data[temp_cols].astype(float))

In [154]:
avg_weather = weather_data.groupby(['park', weather_data['time'].dt.month, weather_data['time'].dt.day]).mean()

In [187]:
is_national_park = avg_weather.index.get_level_values(0).str.contains('National Park')
is_cool_temp = avg_weather['tmax'] <= 76

In [188]:
avg_weather[is_national_park & is_cool_temp].sort_values('dist').index.get_level_values(0).unique() #nps_parks_df

Index(['Isle Royale National Park', 'Acadia National Park',
       'Wind Cave National Park', 'Rocky Mountain National Park',
       'Bryce Canyon National Park', 'Yellowstone National Park'],
      dtype='object', name='park')

#### See what data we have/dont have. Are we missing data based on:
- East vs west, north vs south?

In [55]:
nps_parks = nps_parks_df.name.unique()
parks_with_data = weather_data.park.unique()

parks_without_data = [park for park in nps_parks if park not in parks_with_data]

----

### Realized most parks that are 'National Parks' don't have weather data via this API

In [206]:
d = get_daily_weather_for_park(nps_parks_df, 2001, month, start_day, end_day)

In [209]:
get_daily_weather(37.8488, -119.5571, 2020, 8, 10, 2020, 8, 20)

Unnamed: 0,tavg,tmin,tmax,prcp,snow,wdir,wspd,wpgt,pres,tsun
