In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
import re
import json

# Selenium setup
options = Options()
options.add_argument("--headless") 
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920,1080")

service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service, options=options)

url = "https://irma.nps.gov/Stats/"

try:
    driver.get(url)

    # Wait for the dropdown to be present
    WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.ID, "editComboId-trigger-picker"))
    )

    # Click to open the dropdown
    dropdown = driver.find_element(By.ID, "editComboId-trigger-picker")
    dropdown.click()

    # Wait for the dropdown options to load
    WebDriverWait(driver, 10).until(
        EC.presence_of_all_elements_located((By.XPATH, "//li[contains(@class, 'x-boundlist-item')]"))
    )

    # Extract all park names
    parks = driver.find_elements(By.XPATH, "//li[contains(@class, 'x-boundlist-item')]")
    park_names = [park.text for park in parks if park.text.strip()]

    # Filter only National Parks (NP) and store them in a dictionary
    np_parks = {
        name.rsplit(' ', 1)[0]: abbr[:]
        for name in park_names if "NP " in name
        for abbr in re.findall(r'\((.*?)\)', name)
    }
    
    # Remove Wolf Trap since it is not a National Park by definition
    np_parks.pop("Wolf Trap NP for the Performing Arts", None)

    # Add National Park of American Samoa as the regex filters this out
    np_parks["National Park of American Samoa"] = "NPSA"

    # Save to a JSON file
    with open("np_parks.json", "w", encoding="utf-8") as json_file:
        json.dump(np_parks, json_file, indent=4)

    print("National Parks data saved to np_parks.json")

except Exception as e:
    print("Error:", str(e))

finally:
    driver.quit()


National Parks data saved to np_parks.json


In [None]:
import pandas as pd 
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select
from webdriver_manager.chrome import ChromeDriverManager
import re
import time
import json

def beep():
    import winsound
    duration = 1000  # milliseconds
    freq = 440  # Hz
    winsound.Beep(freq, duration)

# Load the np_parks.json file
with open('np_parks.json') as f:
    park_codes = json.load(f)

MONTHS = ["Dec", "Nov", "Oct", "Sep", "Aug", "Jul", "Jun", "May", "Apr", "Mar", "Feb", "Jan"]
YEAR_RANGE = [2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024]

# List to store extracted data
df_list = []

def setup_driver():
    options = Options()
    options.add_argument("--headless=new") 
    options.add_argument("--disable-gpu")
    options.add_argument("--window-size=1920,1080")
    
    service = Service(ChromeDriverManager().install())
    return webdriver.Chrome(service=service, options=options)

def get_dropdown_value(month, year):
    """Calculate dropdown value based on month and year."""
    return str(MONTHS.index(month) + 1 + (2024 - year) * 12)

def extract_data(park_name, park_code, month, year):
    """Extract footfall and other details for a given month-year."""
    try:
        # Update the URL with the park code
        URL = f"https://irma.nps.gov/Stats/SSRSReports/Park%20Specific%20Reports/Monthly%20Public%20Use?Park={park_code}"
        driver.get(URL)
        
        # Wait for iframe and switch to it
        WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.TAG_NAME, "iframe")))
        iframe = driver.find_element(By.TAG_NAME, "iframe")
        driver.switch_to.frame(iframe)

        # Get dropdown value and select month
        dropdown_value = get_dropdown_value(month, year)
        dropdown = Select(driver.find_element(By.ID, "ReportViewer_ctl04_ctl05_ddValue"))
        dropdown.select_by_value(dropdown_value)

        # Click submit button
        submit_button = driver.find_element(By.ID, "ReportViewer_ctl04_ctl00")
        submit_button.click()

        # Wait for report to reload
        time.sleep(5)

        # Extract report data
        report_data = driver.find_element(By.TAG_NAME, "body").text

        # Extract footfall (Total Visits)
        footfall_match = re.search(r"Total\sVisits\s(\d{1,3}(?:,\d{3})*)", report_data)

        # Extract other overnight stays
        def extract_value(pattern, default=0):
            match = re.search(pattern, report_data)
            return int(match.group(1).replace(",", "")) if match else default
        
        concessioner_lodging = extract_value(r"Concessioner Lodging\s+(\d{1,3}(?:,\d{3})*)")
        nps_campgrounds = extract_value(r"NPS Campgrounds\s+(\d{1,3}(?:,\d{3})*)")
        nps_backcountry = extract_value(r"NPS Backcountry\s+(\d{1,3}(?:,\d{3})*)")
        nps_miscellaneous = extract_value(r"NPS Miscellaneous\s+(\d{1,3}(?:,\d{3})*)")

        if footfall_match:
            footfall = int(footfall_match.group(1).replace(",", ""))

            df_list.append({
                "National Park": park_name, 
                "Month": month, 
                "Year": year, 
                "Footfall": footfall,
                "Concessioner Lodging": concessioner_lodging,
                "NPS Campgrounds": nps_campgrounds,
                "NPS Backcountry": nps_backcountry,
                "NPS Miscellaneous": nps_miscellaneous,
            })

    except Exception as e:
        print(f"Error processing {park_name} {month} {year}: {e}")

driver = setup_driver()
for park_name, park_code in park_codes.items():
    for year in YEAR_RANGE:
        for month in MONTHS:
            extract_data(park_name, park_code, month, year)

driver.quit()

# Create DataFrame from collected data
df = pd.DataFrame(df_list)

# Save to CSV
df.to_csv("park_data_nothread.csv", mode="a", header=False, index=False)

# Print first few rows
print(df.head())

beep()

        National Park Month  Year  Footfall  Concessioner Lodging  \
0  Channel Islands NP   May  2021     29101                     0   
1  Channel Islands NP   Jun  2021     41970                     0   
2  Channel Islands NP   Aug  2021     37996                     0   
3  Channel Islands NP   Apr  2021     23441                     0   
4  Channel Islands NP   Jul  2021     42397                     0   

   NPS Campgrounds  NPS Backcountry  NPS Miscellaneous  
0                0             2691               3528  
1                0             2776               4334  
2                0             3254               8374  
3                0             1832               2142  
4                0             3014               6566  
