# Master Branch Code

## How does the climate impact visitation to the U.S. National Parks?

### Hypothesis: Increasing average annual temperature has a positive correlation with the total number of registered visitors to the U.S. National Parks annually.

#### Team Members: Andy Swellie, Nick Marchetti, Tony "The IT Guy" Elkadi, Kevin Coyne


In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import json
import os
from pprint import pprint
import csv
import gmaps
import datetime

### Get Latitude and Longitude for 20 most visited parks - Anthony
#### Need to put the API key into a config file and replace that in the URL string

In [3]:
top_20_parks = ["Great Smoky Mountains National Park", "Grand Canyon National Park", "Yosemite National Park", 
                "Rocky Mountain National Park", "Zion National Park", "Yellowstone National Park", "Olympic National Park", 
                "Acadia National Park", "Grand Teton National Park", "Glacier National Park", "Joshua Tree National Park", 
               "Cuyahoga Valley National Park", "Bryce Canyon", "Hawaii Volcanoes National Park", "Arches National Park", "Hot Springs National Park",
                "Shannondoah National Park", "Mount Rainier National Park", "Death Valley National Park", "Halekala National Park"]

lats = []
lngs = []
parks = []


for park in top_20_parks:
    url = "https://maps.googleapis.com/maps/api/place/findplacefromtext/json?input=" + park + "&inputtype=textquery&fields=name,geometry&key=AIzaSyAfjVUQRwNEoYjUpjNsyAL5Nc8is2IUykg"
    response = requests.get(url)
    national_park_json = response.json()
    
    lats.append(national_park_json["candidates"][0]["geometry"]["location"]["lat"])
    lngs.append(national_park_json["candidates"][0]["geometry"]["location"]["lng"])
    parks.append(park)

columns = ["Park", "Lat", "Lng"]
National_Parks_df = pd.DataFrame(columns=columns)
National_Parks_df["Park"] = parks
National_Parks_df["Lat"] = lats
National_Parks_df["Lng"] = lngs
National_Parks_df.head(20)

Unnamed: 0,Park,Lat,Lng
0,Great Smoky Mountains National Park,35.611764,-83.489545
1,Grand Canyon National Park,36.106965,-112.112997
2,Yosemite National Park,37.865101,-119.538329
3,Rocky Mountain National Park,40.342793,-105.683639
4,Zion National Park,37.298202,-113.026301
5,Yellowstone National Park,44.427963,-110.588455
6,Olympic National Park,47.802107,-123.604352
7,Acadia National Park,44.338556,-68.273335
8,Grand Teton National Park,43.790428,-110.681763
9,Glacier National Park,48.759613,-113.787023


In [4]:
National_Parks_df.to_csv("Top_20_Parks.csv", index=False, header=True)

In [5]:
# Take top 20 most visited National Parks in the US

top_20_parks = ["Great Smoky Mountains National Park", "Grand Canyon National Park", "Yosemite National Park", 
                "Rocky Mountain National Park", "Zion National Park", "Yellowstone National Park", "Olympic National Park", 
                "Acadia National Park", "Grand Teton National Park", "Glacier National Park", "Joshua Tree National Park", 
               "Cuyahoga Valley National Park", "Bryce Canyon", "Hawaii Volcanoes National Park", "Arches National Park", "Hot Springs National Park",
                "Shannondoah National Park", "Mount Rainier National Park", "Death Valley National Park", "Halekala National Park"]

# Plot Heatmap

fig = gmaps.figure()

# Define locations
locations = National_Parks_df[["Lat", "Lng"]]

# Create heat layer
heat_layer = gmaps.heatmap_layer(locations, 
                                 dissipating=False, max_intensity=10,
                                 point_radius=1)


# Add layer
fig.add_layer(heat_layer)

# Display figure
fig

Figure(layout=FigureLayout(height='420px'))

### Pull in CSV files of visitor data by park and build data frames - Andy

#### Park visit data was pulled down and converted to CSV using a google widget from: "https://irma.nps.gov/Stats/SSRSReports/Park%20Specific%20Reports/Visitation%20by%20Month?Park=SAJU"

In [6]:
park_visits = "Top10_Parks_Visits/Top10_Park_Visits.csv"
park_visits_df = pd.read_csv(park_visits, encoding="utf-8")

park_visits_df.head(3000)

Unnamed: 0,Park,Year,Month,Visits
0,Acadia National Park,2018,1,11974
1,Acadia National Park,2018,2,12167
2,Acadia National Park,2018,3,23756
3,Acadia National Park,2018,4,72858
4,Acadia National Park,2018,5,249050
5,Acadia National Park,2018,6,499163
6,Acadia National Park,2018,7,759165
7,Acadia National Park,2018,8,767219
8,Acadia National Park,2018,9,600397
9,Acadia National Park,2018,10,488204


### Create a list of dates to use in the weather API calls - Nick

In [7]:
base_url = "https://api.darksky.net/forecast/[key]/[latitude],[longitude]"

url = "https://api.darksky.net/forecast/207521500b0c1207a0cd6d1181bf4ce1/40.5,-80.7,2010-06-01T12:00:00"

In [8]:
response = requests.get(url)
json_response = response.json()
print(response)

<Response [200]>


In [9]:
pprint(json_response)

{'currently': {'apparentTemperature': 73.98,
               'cloudCover': 1,
               'dewPoint': 63.01,
               'humidity': 0.69,
               'icon': 'cloudy',
               'precipIntensity': 0,
               'precipProbability': 0,
               'pressure': 1013.64,
               'summary': 'Overcast',
               'temperature': 73.66,
               'time': 1275408000,
               'uvIndex': 4,
               'visibility': 2.55,
               'windBearing': 277,
               'windGust': 6.24,
               'windSpeed': 2.33},
 'daily': {'data': [{'apparentTemperatureHigh': 79.27,
                     'apparentTemperatureHighTime': 1275429600,
                     'apparentTemperatureLow': 57.65,
                     'apparentTemperatureLowTime': 1275472800,
                     'apparentTemperatureMax': 79.27,
                     'apparentTemperatureMaxTime': 1275429600,
                     'apparentTemperatureMin': 64.18,
                     'appar

In [10]:
dates = pd.date_range(start='2008-01-01', end='2018-12-31', freq='2d')

print(dates)

DatetimeIndex(['2008-01-01', '2008-01-03', '2008-01-05', '2008-01-07',
               '2008-01-09', '2008-01-11', '2008-01-13', '2008-01-15',
               '2008-01-17', '2008-01-19',
               ...
               '2018-12-12', '2018-12-14', '2018-12-16', '2018-12-18',
               '2018-12-20', '2018-12-22', '2018-12-24', '2018-12-26',
               '2018-12-28', '2018-12-30'],
              dtype='datetime64[ns]', length=2009, freq='2D')


In [11]:
url_dates = []
reg_dates = []
months = []
years = []

for x in dates:
    date_parts = datetime.datetime.strptime((str(x).split(' ', 1)[0]), "%Y-%m-%d")
    # month number
    months.append(date_parts.month)
    years.append(date_parts.year)
    reg_dates.append((str(x).split(' ', 1)[0]))
    url_dates.append((str(x).split(' ', 1)[0]) + "T12:00:00")

In [12]:
date_df = pd.DataFrame({"date" : reg_dates,
                        "url_date" : url_dates,
                        "year" : years,
                        "month" : months})

date_df.head()

Unnamed: 0,date,url_date,year,month
0,2008-01-01,2008-01-01T12:00:00,2008,1
1,2008-01-03,2008-01-03T12:00:00,2008,1
2,2008-01-05,2008-01-05T12:00:00,2008,1
3,2008-01-07,2008-01-07T12:00:00,2008,1
4,2008-01-09,2008-01-09T12:00:00,2008,1


### Weather API calls & Data Frames - Kevin & Nick

In [13]:
base_url = "https://api.darksky.net/forecast/[key]/[latitude],[longitude]"
test_url = "https://api.darksky.net/forecast/207521500b0c1207a0cd6d1181bf4ce1/35.611764,-83.489545,2009-02-03T12:00:00"

file = "CSVs/Top_10_Parks.csv"

park_df = pd.read_csv(file)

park_df.head()

Unnamed: 0,Park,Lat,Lng
0,Great Smoky Mountains National Park,35.611764,-83.489545
1,Grand Canyon National Park,36.106965,-112.112997
2,Yosemite National Park,37.865101,-119.538329
3,Rocky Mountain National Park,40.342793,-105.683639
4,Zion National Park,37.298202,-113.0263


In [14]:
response = requests.get(test_url)
json_response = response.json()
print(response)

<Response [200]>


In [15]:
pprint(json_response)

{'currently': {'apparentTemperature': 24.76,
               'cloudCover': 1,
               'dewPoint': 17.16,
               'humidity': 0.72,
               'icon': 'cloudy',
               'precipIntensity': 0.0008,
               'precipProbability': 0.22,
               'precipType': 'snow',
               'summary': 'Overcast',
               'temperature': 24.76,
               'time': 1233680400,
               'uvIndex': 3,
               'visibility': 10,
               'windBearing': 250,
               'windGust': 10.62,
               'windSpeed': 2.34},
 'daily': {'data': [{'apparentTemperatureHigh': 24.76,
                     'apparentTemperatureHighTime': 1233680400,
                     'apparentTemperatureLow': -6.2,
                     'apparentTemperatureLowTime': 1233745200,
                     'apparentTemperatureMax': 24.76,
                     'apparentTemperatureMaxTime': 1233680400,
                     'apparentTemperatureMin': -1.54,
                    

In [16]:
# Create range of dates to use in loop when going through API

dates = pd.date_range(start='2009-01-01', end='2018-12-31', freq='3d')

print(dates)

DatetimeIndex(['2009-01-01', '2009-01-04', '2009-01-07', '2009-01-10',
               '2009-01-13', '2009-01-16', '2009-01-19', '2009-01-22',
               '2009-01-25', '2009-01-28',
               ...
               '2018-12-04', '2018-12-07', '2018-12-10', '2018-12-13',
               '2018-12-16', '2018-12-19', '2018-12-22', '2018-12-25',
               '2018-12-28', '2018-12-31'],
              dtype='datetime64[ns]', length=1218, freq='3D')


In [17]:
# Create list of dates in the format needed for the API calls

url_dates = []
reg_dates = []
months = []
years = []

for x in dates:
    date_parts = datetime.datetime.strptime((str(x).split(' ', 1)[0]), "%Y-%m-%d")
    # month number
    months.append(date_parts.month)
    years.append(date_parts.year)
    reg_dates.append((str(x).split(' ', 1)[0]))
    url_dates.append((str(x).split(' ', 1)[0]) + "T12:00:00")
    
print(url_dates)

['2009-01-01T12:00:00', '2009-01-04T12:00:00', '2009-01-07T12:00:00', '2009-01-10T12:00:00', '2009-01-13T12:00:00', '2009-01-16T12:00:00', '2009-01-19T12:00:00', '2009-01-22T12:00:00', '2009-01-25T12:00:00', '2009-01-28T12:00:00', '2009-01-31T12:00:00', '2009-02-03T12:00:00', '2009-02-06T12:00:00', '2009-02-09T12:00:00', '2009-02-12T12:00:00', '2009-02-15T12:00:00', '2009-02-18T12:00:00', '2009-02-21T12:00:00', '2009-02-24T12:00:00', '2009-02-27T12:00:00', '2009-03-02T12:00:00', '2009-03-05T12:00:00', '2009-03-08T12:00:00', '2009-03-11T12:00:00', '2009-03-14T12:00:00', '2009-03-17T12:00:00', '2009-03-20T12:00:00', '2009-03-23T12:00:00', '2009-03-26T12:00:00', '2009-03-29T12:00:00', '2009-04-01T12:00:00', '2009-04-04T12:00:00', '2009-04-07T12:00:00', '2009-04-10T12:00:00', '2009-04-13T12:00:00', '2009-04-16T12:00:00', '2009-04-19T12:00:00', '2009-04-22T12:00:00', '2009-04-25T12:00:00', '2009-04-28T12:00:00', '2009-05-01T12:00:00', '2009-05-04T12:00:00', '2009-05-07T12:00:00', '2009-05-1

In [18]:
# Make dataframe of dates, api dates, years, and months
date_df = pd.DataFrame({"date" : reg_dates,
                        "url_date" : url_dates,
                        "year" : years,
                        "month" : months})

date_df.head()

Unnamed: 0,date,url_date,year,month
0,2009-01-01,2009-01-01T12:00:00,2009,1
1,2009-01-04,2009-01-04T12:00:00,2009,1
2,2009-01-07,2009-01-07T12:00:00,2009,1
3,2009-01-10,2009-01-10T12:00:00,2009,1
4,2009-01-13,2009-01-13T12:00:00,2009,1


In [18]:
base_url = "https://api.darksky.net/forecast/207521500b0c1207a0cd6d1181bf4ce1/" 

parks = park_df["Park"]
lat = park_df["Lat"]
lng = park_df["Lng"]



# Empty lists for new dataframe
df_dates = []
df_months = []
df_years = []
df_parks = []
temps = []

for x in range(len(parks)):
    coord = str(lat[x]) + "," + str(lng[x])
    
    print("Currently gathering temperatures for " + parks[x])
    
    for z in range(len(url_dates)):
        try:
            weather_url = base_url + coord + "," + url_dates[z]
            response = requests.get(weather_url).json()
            temp = response["daily"]["data"][0]["temperatureHigh"]
            temps.append(temp)
            df_dates.append(reg_dates[z])
            df_months.append(months[z])
            df_years.append(years[z])
            df_parks.append(parks[x])
#           print(parks[x] + " temperature (" + reg_dates[z] + "): " + str(temp))
        except KeyError:
            print("Record not found, skipping.")

Currently gathering temperatures for Great Smoky Mountains National Park
Currently gathering temperatures for Grand Canyon National Park
Record not found, skipping.
Currently gathering temperatures for Yosemite National Park
Record not found, skipping.
Currently gathering temperatures for Rocky Mountain National Park
Record not found, skipping.
Currently gathering temperatures for Zion National Park
Record not found, skipping.
Currently gathering temperatures for Yellowstone National Park
Record not found, skipping.
Currently gathering temperatures for Olympic National Park
Record not found, skipping.
Currently gathering temperatures for Acadia National Park
Currently gathering temperatures for Grand Teton National Park
Record not found, skipping.
Currently gathering temperatures for Glacier National Park
Record not found, skipping.


In [19]:
final_df = pd.DataFrame({"Park" : df_parks,
                        "Date" : df_dates,
                        "Year" : df_years,
                        "Month" : df_months,
                        "Temp" : temps})


final_df.head()

NameError: name 'df_parks' is not defined

In [20]:
final_df.to_csv("final_df.csv", index = False, header = True)

NameError: name 'final_df' is not defined

In [21]:
avg_by_month_df = final_df.groupby(["Park", "Year", "Month"]).mean()
avg_by_month_df = avg_by_month_df.reset_index()
avg_by_month_df

#avg_by_month_df.to_csv("avg_by_month.csv", index = False, header = True)

NameError: name 'final_df' is not defined

In [22]:
avg_by_year_df = final_df.groupby(["Park", "Year"]).mean()
del avg_by_year_df["Month"]
avg_by_year_df = avg_by_year_df.reset_index()
avg_by_year_df

avg_by_year_df.to_csv("avg_by_year.csv", index = False, header = True)

NameError: name 'final_df' is not defined

In [24]:
avg_by_year_all_df = final_df.groupby(["Year"]).mean()
del avg_by_year_all_df["Month"]
avg_by_year_all_df = avg_by_year_all_df.reset_index()
avg_by_year_all_df

avg_by_year_all_df.to_csv("avg_by_year_all_df.csv", index = False, header = True)

In [25]:
avg_by_year_month_all_df = final_df.groupby(["Year","Month"]).mean()
avg_by_year_month_all_df = avg_by_year_month_all_df.reset_index()
avg_by_year_month_all_df

avg_by_year_month_all_df.to_csv("avg_by_year_month_all_df.csv", index = False, header = True)

### Final Data Frame for Weather: including lat/long

In [26]:

file = "avg_by_month.csv"
new_df = pd.read_csv(file)
new_df

Unnamed: 0,Park,Year,Month,Temp
0,Acadia National Park,2009,1,21.501818
1,Acadia National Park,2009,2,28.770000
2,Acadia National Park,2009,3,32.956000
3,Acadia National Park,2009,4,45.278000
4,Acadia National Park,2009,5,54.897273
5,Acadia National Park,2009,6,59.150000
6,Acadia National Park,2009,7,62.654000
7,Acadia National Park,2009,8,68.203000
8,Acadia National Park,2009,9,59.403000
9,Acadia National Park,2009,10,48.867273


In [27]:
new_df["Vistors"] = ""
new_df

Unnamed: 0,Park,Year,Month,Temp,Vistors
0,Acadia National Park,2009,1,21.501818,
1,Acadia National Park,2009,2,28.770000,
2,Acadia National Park,2009,3,32.956000,
3,Acadia National Park,2009,4,45.278000,
4,Acadia National Park,2009,5,54.897273,
5,Acadia National Park,2009,6,59.150000,
6,Acadia National Park,2009,7,62.654000,
7,Acadia National Park,2009,8,68.203000,
8,Acadia National Park,2009,9,59.403000,
9,Acadia National Park,2009,10,48.867273,


In [2]:

file = "national_parks.csv"
coords_df = pd.read_csv(file)

coords_df.head()

merged_df = pd.merge(new_df, coords_df, how="inner")

merged_df

NameError: name 'new_df' is not defined

In [30]:
merged_df.to_csv("with_coords.csv", index = False, header = True)

NameError: name 'merged_df' is not defined