# State Park Data Collection

In [1]:
import os
import requests
import json
import pandas as pd
import numpy as np
from config import NPS_key
import pprint as pp

In [2]:
# Import SQLAlchemy `automap` and other dependencies here
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, inspect, func, MetaData,Table, Column, Integer, String
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()

In [3]:
#from config import pg_password

Get Parks data

In [4]:
parks_url = "https://developer.nps.gov/api/v1/parks?limit=600&api_key="

response = requests.get(parks_url + NPS_key)     
data = response.json()
parkData = data['data']


In [5]:
#pp.pprint(parkData[600])
len(parkData)

468

In [6]:
parks_info =[]
park_activity_info=[]
for park in parkData:
    if (isinstance(park['images'], list) and len(park['images'])> 0):
        park_info = {
            "name": park['name'],
            "lat": park['latitude'],
            "long": park['longitude'],
            "parkCode": park['parkCode'],
            "image_url": park['images'][0]['url'],
            "image_title":park['images'][0]['title'],
            "image_credit":park['images'][0]['credit']
            }
        print(park['name'])
       
    else:
        park_info = {
            "name": park['name'],
            "lat": park['latitude'],
            "long": park['longitude'],
            "parkCode": park['parkCode'],
            "image_url": "",
            "image_title":"",
            "image_credit":""
            }    
    parks_info.append(park_info) 
    park_activities = park['activities']
    for activity in park_activities:
        park_activity ={
            "parkCode":park['parkCode'],
            "activityId":activity["id"]
            }
        park_activity_info.append(park_activity)
#pp.pprint(parks_info)
#pp.pprint(park_activity_info)


Abraham Lincoln Birthplace
Acadia
Adams
African American Civil War Memorial
African Burial Ground
Agate Fossil Beds
Ala Kahakai
Alagnak
Alaska Public Lands
Alcatraz Island
Aleutian Islands World War II
Alibates Flint Quarries
Allegheny Portage Railroad
American Memorial
Amistad
Anacostia
Andersonville
Andrew Johnson
Aniakchak
Antietam
Apostle Islands
Appalachian
Appomattox Court House
Arches
Arkansas Post
Arlington House, The Robert E. Lee Memorial
Assateague Island
Aztec Ruins
Badlands
Baltimore-Washington
Bandelier
Belmont-Paul Women's Equality
Bent's Old Fort
Bering Land Bridge
Big Bend
Big Cypress
Big Hole
Big South Fork
Big Thicket
Bighorn Canyon
Birmingham Civil Rights
Biscayne
Black Canyon Of The Gunnison
Blackstone River Valley
Blue Ridge
Bluestone
Booker T Washington
Boston African American
Boston Harbor Islands
Boston
Brices Cross Roads
Brown v. Board of Education
Bryce Canyon
Buck Island Reef
Buffalo
Cabrillo
California
Camp Nelson
Canaveral
Cane River Creole
Canyon de Chell

Get activities 

In [7]:
activities_url = "https://developer.nps.gov/api/v1/activities?api_key="

In [8]:
response = requests.get(activities_url + NPS_key)     
data = response.json()
activitiesData = data['data']
#pp.pprint(activitiesData)

In [9]:
activities_info =[]
for activity in activitiesData:
    act_info = {
        "id": activity['id'],
        "name": activity['name']
        }
    activities_info.append(act_info) 
#activities_info

In [10]:
#use sqlalchemy to create tables and insert all this data

In [11]:

# Create an engine for the database
#engine = create_engine(f'postgresql://postgres:{pg_password}@localhost:5432/NPS')
engine = create_engine("sqlite:///NPS.sqlite")
inspector = inspect(engine)

meta = MetaData()

In [12]:
class Park(Base):
    __tablename__ = 'park'
    parkCode = Column(String(255), primary_key=True)
    name = Column(String(255))
    lat = Column(String(255))
    long = Column(String(255))
    image_url = Column(String(255))
    image_title = Column(String(255))
    image_credit = Column(String(255))   

class Activity(Base):
    __tablename__ = 'activity'
    id = Column(String(255), primary_key=True)
    name = Column(String(255))

class ParkActivities(Base):
    __tablename__ = 'parkActivities'
    id = Column(String(255), primary_key=True)
    parkCode = Column(String(255), primary_key=True)
    
class ParkStats(Base):
    __tablename__ = 'parkStats'
    id = Column(Integer, primary_key=True)
    parkCode = Column(String(255))
    visitors = Column(Integer)
    year = Column(Integer)


In [13]:
 # Create Database Connection
# ----------------------------------

conn = engine.connect()
#clear out the database
Base.metadata.drop_all(engine)

In [14]:
Base.metadata.create_all(engine)

In [15]:
from sqlalchemy.orm import Session
session = Session(bind=engine)

In [16]:
for np in parks_info:
    park = Park(name=np['name'], parkCode =np['parkCode'], lat =np['lat'], long = np['long'],image_url =np['image_url'], image_title = np['image_title'], image_credit =np['image_credit'])
    session.add(park)

In [17]:
for act in activities_info:
    activity = Activity(id= act['id'],name= act['name'])
    session.add(activity)

In [18]:
for pa in park_activity_info:
    park_activity = ParkActivities(id = pa['activityId'], parkCode = pa['parkCode'])
    session.add(park_activity)

In [19]:
# Use the Inspector to explore the database and print the table names
tables = inspector.get_table_names()
tables

['activity', 'park', 'parkActivities', 'parkStats']

In [20]:
# park_list = session.query(Park)
# for park in park_list:
#     print(park.name)

In [21]:
# pa_list = session.query(ParkActivities)
# for pa in pa_list:
#     print(pa.id)

In [22]:
# activity_list = session.query(Activity)
# for activity in activity_list:
#     print(activity.name)

- Pull in visitor statistics from csv files; clean the data (park names have type of park code appended to it)
- for each year, search the parks db using the name, find the parkcode and create a record for the park code, year and # of visitors.
- save to db


In [23]:
# set up needed lists for cleaning

In [24]:
headers = ["park_name", "visitors"]
park_visit_stats = []
park_types = ['NRRA','IHS','MEM PKWY','NPRES','PRES','NRES','NMP','RES','NMEM','PKWY', 'NP', 'NSR', 'NS', 'NM', 'NHS','MEM','NBP', 'BP', 'NRA', 'NHL','NHP','EHP','HS','NL','NB']

In [25]:
# 2020

In [26]:
inputFile = "Resources/Visitation By State and By Park (2020).csv"
np_stats_df = pd.read_csv(inputFile)

parkStats_data  = [np_stats_df["Field1"], np_stats_df["Field2"]]
year_stats = pd.concat(parkStats_data, axis=1, keys=headers)

for ptype in park_types:
    repStr1 = "& " + ptype
    repStr2 = " " + ptype
    year_stats["park_name"] = year_stats["park_name"].str.replace(repStr1, "")
    year_stats["park_name"] = year_stats["park_name"].str.replace(repStr2, "")
    year_stats["park_name"] = year_stats["park_name"].str.strip()
    
# loop thru the df; take the name and select the park from the park table where df.park_name like name
# get the parkCode and the visitor count and append to a new table
for i in range(len(year_stats)) : 
    parkName = year_stats.loc[i, "park_name"]
    parkVisitors = year_stats.loc[i, "visitors"]
    #print(f"{parkName}: {parkVisitors}")
    result = session.query(Park)\
    .filter(Park.name == parkName)
    res = result.first()    
    if res:
        stats_info = {
            "parkCode":result[0].parkCode,
            "visitors": parkVisitors,
            "year":"2020"   
            } 
        park_visit_stats.append(stats_info) 

In [27]:
len(park_visit_stats)

350

In [28]:
#stats_2019

In [29]:
inputFile = "Resources/Visitation By State and By Park (2019).csv"
np_stats_df = pd.read_csv(inputFile)

parkStats_data  = [np_stats_df["Field1"], np_stats_df["Field2"]]
year_stats = pd.concat(parkStats_data, axis=1, keys=headers)

for ptype in park_types:
    repStr1 = "& " + ptype
    repStr2 = " " + ptype
    year_stats["park_name"] = year_stats["park_name"].str.replace(repStr1, "")
    year_stats["park_name"] = year_stats["park_name"].str.replace(repStr2, "")
    year_stats["park_name"] = year_stats["park_name"].str.strip()
    
for i in range(len(year_stats)) : 
    parkName = year_stats.loc[i, "park_name"]
    parkVisitors = year_stats.loc[i, "visitors"]
    #print(f"{parkName}: {parkVisitors}")
    result = session.query(Park)\
    .filter(Park.name == parkName)
    res = result.first()    
    if res:
        stats_info = {
            "parkCode":result[0].parkCode,
            "visitors": parkVisitors,
            "year":"2019"   
            } 
        park_visit_stats.append(stats_info) 

In [30]:
len(park_visit_stats)

695

In [31]:
#2018

In [32]:
inputFile = "Resources/Visitation By State and By Park (2018).csv"
np_stats_df = pd.read_csv(inputFile)

parkStats_data  = [np_stats_df["Field1"], np_stats_df["Field2"]]
year_stats = pd.concat(parkStats_data, axis=1, keys=headers)

for ptype in park_types:
    repStr1 = "& " + ptype
    repStr2 = " " + ptype
    year_stats["park_name"] = year_stats["park_name"].str.replace(repStr1, "")
    year_stats["park_name"] = year_stats["park_name"].str.replace(repStr2, "")
    year_stats["park_name"] = year_stats["park_name"].str.strip()
    
for i in range(len(year_stats)) : 
    parkName = year_stats.loc[i, "park_name"]
    parkVisitors = year_stats.loc[i, "visitors"]
    #print(f"{parkName}: {parkVisitors}")
    result = session.query(Park)\
    .filter(Park.name == parkName)
    res = result.first()    
    if res:
        stats_info = {
            "parkCode":result[0].parkCode,
            "visitors": parkVisitors,
            "year":"2018"   
            } 
        park_visit_stats.append(stats_info) 

In [33]:
len(park_visit_stats)


1040

In [34]:
inputFile = "Resources/Visitation By State and By Park (2017).csv"
np_stats_df = pd.read_csv(inputFile)

parkStats_data  = [np_stats_df["Field1"], np_stats_df["Field2"]]
year_stats = pd.concat(parkStats_data, axis=1, keys=headers)

for ptype in park_types:
    repStr1 = "& " + ptype
    repStr2 = " " + ptype
    year_stats["park_name"] = year_stats["park_name"].str.replace(repStr1, "")
    year_stats["park_name"] = year_stats["park_name"].str.replace(repStr2, "")
    year_stats["park_name"] = year_stats["park_name"].str.strip()
    
for i in range(len(year_stats)) : 
    parkName = year_stats.loc[i, "park_name"]
    parkVisitors = year_stats.loc[i, "visitors"]
    #print(f"{parkName}: {parkVisitors}")
    result = session.query(Park)\
    .filter(Park.name == parkName)
    res = result.first()    
    if res:
        stats_info = {
            "parkCode":result[0].parkCode,
            "visitors": parkVisitors,
            "year":"2017"   
            } 
        park_visit_stats.append(stats_info) 

In [35]:
len(park_visit_stats)


1385

In [36]:
i = 0
for stat in park_visit_stats:
    park_stat = ParkStats(id = i, parkCode = stat['parkCode'], visitors = stat['visitors'], year = stat['year'])
    session.add(park_stat)
    i= i + 1

In [37]:
# stats_list = session.query(ParkStats)
# for stat in stats_list:
#     print(stat.id," ",stat.parkCode," ", stat.visitors," ",stat.year)

0   hobe   28,600   2020
1   liri   802,375   2020
2   natt   428,736   2020
3   ruca   8,462   2020
4   tuai   6,843   2020
5   tuin   4,737   2020
6   ania   36   2020
7   bela   2,642   2020
8   cakr   11,369   2020
9   dena   54,850   2020
10   glba   5,748   2020
11   katm   51,511   2020
12   kefj   115,882   2020
13   kova   11,185   2020
14   lacl   4,948   2020
15   noat   12,533   2020
16   sitk   115,494   2020
17   npsa   4,819   2020
18   cach   76,752   2020
19   cagr   44,269   2020
20   chir   44,794   2020
21   coro   156,199   2020
22   fobo   6,456   2020
23   glca   357,804   2020
24   grca   2,897,098   2020
25   hutr   11,407   2020
26   lake   2,004,127   2020
27   moca   242,028   2020
28   nava   3,985   2020
29   orpi   191,512   2020
30   pefo   384,483   2020
31   pisp   10,623   2020
32   sagu   762,226   2020
33   sucr   83,669   2020
34   tont   24,756   2020
35   tuzi   78,358   2020
36   waca   107,422   2020
37   wupa   146,074   2020
38   arpo   29,65

396   depo   147,864   2019
397   euon   2,944   2019
398   fopo   1,421,349   2019
399   goga   15,002,227   2019
400   jomu   40,725   2019
401   jotr   2,988,547   2019
402   lavo   517,039   2019
403   labe   109,630   2019
404   manz   97,380   2019
405   moja   841,516   2019
406   muwo   812,073   2019
407   pinn   177,224   2019
408   pore   2,265,301   2019
409   poch   830   2019
410   redw   504,722   2019
411   safr   4,016,598   2019
412   samo   707,566   2019
413   whis   687,159   2019
414   yose   4,422,861   2019
415   beol   21,674   2019
416   colm   397,032   2019
417   cure   836,034   2019
418   dino   221,235   2019
419   flfo   77,340   2019
420   grsa   527,546   2019
421   hove   15,577   2019
422   meve   556,203   2019
423   romo   4,670,053   2019
424   sand   5,701   2019
425   wefa   38,700   2019
426   bepa   9,913   2019
427   cawo   2,381   2019
428   choh   1,228,029   2019
429   foth   572,373   2019
430   frdo   61,063   2019
431   kowa   3,841,633

703   cakr   15,087   2018
704   dena   594,660   2018
705   glba   597,915   2018
706   katm   37,818   2018
707   kefj   321,596   2018
708   kova   14,937   2018
709   lacl   14,479   2018
710   noat   16,387   2018
711   sitk   197,017   2018
712   npsa   28,626   2018
713   cach   439,306   2018
714   cagr   62,995   2018
715   chir   60,577   2018
716   coro   103,218   2018
717   fobo   8,401   2018
718   glca   865,713   2018
719   grca   6,380,495   2018
720   hutr   39,361   2018
721   lake   1,894,740   2018
722   moca   390,151   2018
723   nava   61,195   2018
724   orpi   260,375   2018
725   pefo   644,922   2018
726   pisp   25,179   2018
727   sagu   957,405   2018
728   sucr   104,583   2018
729   tont   39,822   2018
730   tuzi   98,090   2018
731   waca   167,736   2018
732   wupa   205,122   2018
733   arpo   30,126   2018
734   fosm   136,568   2018
735   hosp   1,506,887   2018
736   chsc   149,098   2018
737   peri   101,969   2018
738   cabr   842,104   2018
73

1106   colm   375,035   2017
1107   cure   1,041,446   2017
1108   dino   233,735   2017
1109   flfo   71,763   2017
1110   grsa   486,935   2017
1111   hove   17,588   2017
1112   meve   613,788   2017
1113   romo   4,437,215   2017
1114   sand   6,535   2017
1115   wefa   38,096   2017
1116   bepa   10,893   2017
1117   cawo   1,884   2017
1118   choh   1,166,297   2017
1119   foth   744,266   2017
1120   frdo   76,569   2017
1121   kowa   4,155,947   2017
1122   linc   7,956,117   2017
1123   mlkm   3,651,093   2017
1124   mamc   277   2017
1125   paav   108,716   2017
1126   vive   5,072,589   2017
1127   wamo   0   2017
1128   wwii   4,876,842   2017
1129   bicy   922,883   2017
1130   bisc   446,961   2017
1131   cana   1,598,586   2017
1132   casa   876,975   2017
1133   deso   233,354   2017
1134   drto   54,281   2017
1135   ever   1,018,557   2017
1136   foma   578,981   2017
1137   guis   3,104,613   2017
1138   timu   1,218,306   2017
1139   ande   132,149   2017
1140   cha