# Github repository: national-parks


In [1]:
REFRESH_DATA = False

import numpy as np
import pandas as pd
import os

if REFRESH_DATA:
    from bs4 import BeautifulSoup
    import requests
    import time
    user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36'
    headers = {'User-Agent': user_agent}

In [2]:
if REFRESH_DATA:
    from requests.adapters import HTTPAdapter
    from requests.packages.urllib3.util.retry import Retry
    retry_strategy = Retry(
        total=3,
        backoff_factor=1,
        status_forcelist=[429, 500, 502, 503, 504],
        allowed_methods=["HEAD", "GET", "OPTIONS"]
    )
    adapter = HTTPAdapter(max_retries=retry_strategy)
    http = requests.Session()
    http.mount("https://", adapter)
    http.mount("http://", adapter)

In [81]:
if REFRESH_DATA:

    park_units_url = 'https://irmaservices.nps.gov/v2/rest/unit/designations'
    park_units_namespace = {'root': 'NRPC.IrmaServices.Rest.Unit'}
    park_unit_exceptions = {'DENG':'DENA', 'GAAG':'GAAR', 'GLBG':'GLBA', 'GRDG':'GRSA', 'KATG':'KATM', 'LACG':'LACL', 'WRSG':'WRST'}
    
    national_parks = {}
    r = http.get(park_units_url, headers=headers)
    soup = BeautifulSoup(r.text, "xml")
    # print(soup)

    for unit_designation in soup.find_all('UnitDesignation'):
        if unit_designation.find("Code").text == 'NP':
            units = unit_designation.find("Units")
            for value in units.find_all('Value'):
                raw_code = value.find("Code").text
                name = value.find("Name").text
                code = raw_code if raw_code not in park_unit_exceptions.keys() else park_unit_exceptions[raw_code]
                national_parks[code] = name

    # print(national_parks)
    national_parks_df = pd.DataFrame.from_dict(national_parks, orient='index', columns=['name'])
    national_parks_df.index.name = 'code'
    national_parks_df.to_csv('national_parks.csv')
    # print(national_parks_df.head())

In [82]:
if REFRESH_DATA:

    # park_visits_homepage = 'https://irma.nps.gov/STATS/'
    park_visits_domain = 'https://irma.nps.gov'
    park_visits_url = '/STATS/SSRSReports/Park%20Specific%20Reports/Recreation%20Visitors%20By%20Month%20(1979%20-%20Last%20Calendar%20Year)'
    park_visits_qs = '?Park='
    park_visits_df = pd.DataFrame()
    target_table_min = 10

    print('Processing:', end=" ")
    for park_code in national_parks_df.index:
        print(park_code, end=", ")
        park_visits_request = park_visits_domain + park_visits_url + park_visits_qs + park_code
        r = http.get(park_visits_request, headers=headers, timeout=5)
        soup = BeautifulSoup(r.text, "html")
        park_visits_iframe = soup.find('iframe').attrs['src']

        park_visits_request = park_visits_domain + park_visits_iframe
        r = http.get(park_visits_request, headers=headers, timeout=5)

        dfs = pd.read_html(r.text, match="Year", skiprows=1)
        for df in dfs:
            if len(df) > target_table_min: one_park_df = df
        
        new_header = one_park_df.iloc[0] #grab the first row for the header
        one_park_df = one_park_df[1:] #take the data less the header row
        one_park_df.columns = new_header #set the header row as the df header
    
        one_park_df = one_park_df.fillna(0)
        if 'Total' in one_park_df.columns:
            one_park_df.drop('Total', axis=1, inplace=True)

        one_park_df.set_index('Year', inplace=True)
        one_park_srs = one_park_df.stack()

        park_visits_df[park_code] = one_park_srs

    park_visits_df = park_visits_df.fillna(0)
    park_visits_df.index.names = ['Year', 'Month']
    park_visits_df.to_csv('national_park_visits.csv')


In [3]:
park_visits_df = pd.read_csv('national_park_visits.csv', index_col=[0,1])
print(park_visits_df.head())

              ACAD    ARCH    BADL   BIBE   BISC   BLCA    BRCA    CANY  \
Year Month                                                                
2021 JAN     20268   48725   14899  50298  25626   8759   37725   18837   
     FEB     16123   53986   11350  43889  19596   7287   35653   20164   
     MAR     38060  151074   28182  89915  30655   7236   96800   87001   
     APR    110096  193912   42749  67628  30835  14159  201771  116971   
     MAY    324654  225789  109764  53067  37128  34729  297905  134778   

              CARE   CAVE  ...    SHEN   THRO   VIIS   VOYA   WHSA   WICA  \
Year Month                 ...                                              
2021 JAN     20786  16219  ...   34195   6866  18736   4679  45053  14840   
     FEB     25718  13270  ...    9362   5162  19680   7887  35766  10836   
     MAR    115728  30749  ...   73499  15125  22302   1558  98118  16430   
     APR    165498  25859  ...  108358  11993  26531    104  78723  43241   
     MAY    