In [1]:
import pandas as pd
import re

In [2]:
np_data = pd.read_csv('../data/np_data.csv')

In [3]:
np_data.head(1)

Unnamed: 0,park_name,date_established,description,area_acres,area_km2,state,recreational_visits,non_recreational_visits,recreational_hours,non_recreational_hours,tent_overnights,rv_overnights,backcountry_overnights
0,Acadia,1919-02-26,Covering most of Mount Desert Island and other...,49071.4,198.6,Maine,3879890.0,47100.0,25804425.0,47100.0,120615.0,49074.0,1570.0


In [4]:
parks = np_data.park_name.to_list()

In [5]:
parks[0:5]

['Acadia', 'American Samoa', 'Arches', 'Badlands', 'Big Bend']

In [6]:
parks = parks[1:]
parks[0:5]

['American Samoa', 'Arches', 'Badlands', 'Big Bend', 'Biscayne']

In [7]:
acadia = pd.read_csv('../data/national-park-service/np-species/acadia.csv')

In [8]:
acadia = acadia.drop(columns=['Park Code', 'Category Sort', 'Order'
                     , 'Family', 'Taxon Code', 'TSN'
                     , 'Taxon Record Status', 'Scientific Name'
                     , 'Synonyms'])

In [9]:
acadia = acadia.rename(columns={'Park Name':'park_name'
                      ,'Category':'category'
                      ,'Common Names':'common_names'
                      ,'Occurrence':'occurrence'})

In [10]:
acadia

Unnamed: 0,park_name,category,common_names,occurrence
0,Acadia National Park,Mammal,Moose,Present
1,Acadia National Park,Mammal,"Northern White-tailed Deer, Virginia Deer, Whi...",Present
2,Acadia National Park,Mammal,"Coyote, Eastern Coyote",Present
3,Acadia National Park,Mammal,"Black Fox, Cross Fox, Eastern Red Fox, Fox, Re...",Present
4,Acadia National Park,Mammal,"Bay Lynx, Bobcat, Red Lynx, Wild Cat",Present
...,...,...,...,...
1165,Acadia National Park,Vascular Plant,Hedge Bindweed,Present
1166,Acadia National Park,Vascular Plant,"Common Dodder, Love Vine",Present
1167,Acadia National Park,Vascular Plant,Deadly Nightshade,Present
1168,Acadia National Park,Vascular Plant,Buffalo Bur,Present


In [11]:
re.findall(r'(^.*) National Park', acadia.park_name[0])

['Acadia']

In [12]:
acadia.park_name = acadia.park_name.apply(lambda park: re.findall(r'(^.*) National Park', park)[0])

In [13]:
acadia = acadia.loc[acadia.occurrence == 'Present']

In [14]:
acadia = acadia.drop(columns='occurrence')

In [15]:
acadia

Unnamed: 0,park_name,category,common_names
0,Acadia,Mammal,Moose
1,Acadia,Mammal,"Northern White-tailed Deer, Virginia Deer, Whi..."
2,Acadia,Mammal,"Coyote, Eastern Coyote"
3,Acadia,Mammal,"Black Fox, Cross Fox, Eastern Red Fox, Fox, Re..."
4,Acadia,Mammal,"Bay Lynx, Bobcat, Red Lynx, Wild Cat"
...,...,...,...
1165,Acadia,Vascular Plant,Hedge Bindweed
1166,Acadia,Vascular Plant,"Common Dodder, Love Vine"
1167,Acadia,Vascular Plant,Deadly Nightshade
1168,Acadia,Vascular Plant,Buffalo Bur


In [16]:
def speciesDf(base_df = acadia):
    biodiversity = base_df
    for park in parks:
        park = park.lower()
        try:
            df = pd.read_csv(f'../data/national-park-service/np-species/{park}.csv', encoding='latin-1')
            print(f'success --{park}.csv--')
        except Exception as error:
            print(f'FAILED  --{park}.csv--')
            continue
        df = df.drop(columns=['Park Code', 'Category Sort', 'Order'
                            , 'Family', 'Taxon Code', 'TSN'
                            , 'Taxon Record Status', 'Scientific Name'
                            , 'Synonyms'])
        df = df.rename(columns={'Park Name':'park_name'
                               ,'Category':'category'
                               ,'Common Names':'common_names'
                               ,'Occurrence':'occurrence'})
        df.park_name = df.park_name.apply(lambda name: re.findall(r'(^.*) National Park', name)[0])
        df = df.loc[df.occurrence == 'Present']
        df = df.drop(columns='occurrence')
        frames = [biodiversity, df]
        biodiversity = pd.concat(frames)
    return biodiversity

In [17]:
biodiversity = speciesDf() # Failures are due to a lack of species data, the files do not exist.

FAILED  --american samoa.csv--
success --arches.csv--
success --badlands.csv--
success --big bend.csv--
success --biscayne.csv--
success --black canyon of the gunnison.csv--
success --bryce canyon.csv--
success --canyonlands.csv--
success --capitol reef.csv--
success --carlsbad caverns.csv--
success --channel islands.csv--
success --congaree.csv--
success --crater lake.csv--
success --cuyahoga valley.csv--
success --death valley.csv--
success --denali.csv--
success --dry tortugas.csv--
success --everglades.csv--
success --gates of the arctic.csv--
FAILED  --gateway arch.csv--
success --glacier.csv--
success --glacier bay.csv--
success --grand canyon.csv--
success --grand teton.csv--
success --great basin.csv--
success --great sand dunes.csv--
success --great smoky mountains.csv--
success --guadalupe mountains.csv--
FAILED  --haleakalā.csv--
FAILED  --hawaiʻi volcanoes.csv--
success --hot springs.csv--
success --indiana dunes.csv--
success --isle royale.csv--
success --joshua tree.csv--

In [18]:
biodiversity

Unnamed: 0,park_name,category,common_names,Record Status,Occurrence Tags,Nativeness,Nativeness Tags,Abundance
0,Acadia,Mammal,Moose,,,,,
1,Acadia,Mammal,"Northern White-tailed Deer, Virginia Deer, Whi...",,,,,
2,Acadia,Mammal,"Coyote, Eastern Coyote",,,,,
3,Acadia,Mammal,"Black Fox, Cross Fox, Eastern Red Fox, Fox, Re...",,,,,
4,Acadia,Mammal,"Bay Lynx, Bobcat, Red Lynx, Wild Cat",,,,,
...,...,...,...,...,...,...,...,...
1433,Zion,Vascular Plant,cut-leaf nightshade,,,,,
1434,Zion,Vascular Plant,canyon grape,,,,,
1435,Zion,Vascular Plant,wine grape,,,,,
1436,Zion,Vascular Plant,creosote bush,,,,,


In [19]:
biodiversity.park_name.nunique()

56

In [20]:
biodiversity = biodiversity.drop(columns=['Record Status', 'Occurrence Tags'
                          ,'Nativeness', 'Nativeness Tags', 'Abundance'])

In [21]:
biodiversity

Unnamed: 0,park_name,category,common_names
0,Acadia,Mammal,Moose
1,Acadia,Mammal,"Northern White-tailed Deer, Virginia Deer, Whi..."
2,Acadia,Mammal,"Coyote, Eastern Coyote"
3,Acadia,Mammal,"Black Fox, Cross Fox, Eastern Red Fox, Fox, Re..."
4,Acadia,Mammal,"Bay Lynx, Bobcat, Red Lynx, Wild Cat"
...,...,...,...
1433,Zion,Vascular Plant,cut-leaf nightshade
1434,Zion,Vascular Plant,canyon grape
1435,Zion,Vascular Plant,wine grape
1436,Zion,Vascular Plant,creosote bush


In [22]:
#biodiversity.to_csv('../data/biodiversity.csv', index=False)