In [1]:
# Data science
import pandas as pd

# API
import requests
import json

# Python SQL toolkit and Object Relational Mapper
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, text, inspect, func

In [2]:
# Read the CSV files
parks_df = pd.read_csv('Resources/parks.csv')
species_df = pd.read_csv('Resources/species.csv')

In [3]:
parks_df.head()

Unnamed: 0,Park Code,Park Name,State,Acres,Latitude,Longitude
0,ACAD,Acadia National Park,ME,47390,44.35,-68.21
1,ARCH,Arches National Park,UT,76519,38.68,-109.57
2,BADL,Badlands National Park,SD,242756,43.75,-102.5
3,BIBE,Big Bend National Park,TX,801163,29.25,-103.25
4,BISC,Biscayne National Park,FL,172924,25.65,-80.08


In [4]:
# Rename the columns on the dataframe
parks_df.rename(columns={
    'Park Code': 'Park_Code',
    'Park Name': 'Park_Name'
}, inplace=True)

parks_df.head()

Unnamed: 0,Park_Code,Park_Name,State,Acres,Latitude,Longitude
0,ACAD,Acadia National Park,ME,47390,44.35,-68.21
1,ARCH,Arches National Park,UT,76519,38.68,-109.57
2,BADL,Badlands National Park,SD,242756,43.75,-102.5
3,BIBE,Big Bend National Park,TX,801163,29.25,-103.25
4,BISC,Biscayne National Park,FL,172924,25.65,-80.08


In [5]:
species_df.head()

Unnamed: 0,Species ID,Park Name,Category,Order,Family,Scientific Name,Common Names,Record Status,Occurrence,Nativeness,Abundance,Seasonality,Conservation Status
0,ACAD-1000,Acadia National Park,Mammal,Artiodactyla,Cervidae,Alces alces,Moose,Approved,Present,Native,Rare,Resident,
1,ACAD-1001,Acadia National Park,Mammal,Artiodactyla,Cervidae,Odocoileus virginianus,"Northern White-Tailed Deer, Virginia Deer, Whi...",Approved,Present,Native,Abundant,,
2,ACAD-1002,Acadia National Park,Mammal,Carnivora,Canidae,Canis latrans,"Coyote, Eastern Coyote",Approved,Present,Not Native,Common,,Species of Concern
3,ACAD-1003,Acadia National Park,Mammal,Carnivora,Canidae,Canis lupus,"Eastern Timber Wolf, Gray Wolf, Timber Wolf",Approved,Not Confirmed,Native,,,Endangered
4,ACAD-1004,Acadia National Park,Mammal,Carnivora,Canidae,Vulpes vulpes,"Black Fox, Cross Fox, Eastern Red Fox, Fox, Re...",Approved,Present,Unknown,Common,Breeder,


In [6]:
#drop_list = ["Record Status", "Occurrence", "Nativeness", "Abundance", "Seasonality"]

In [7]:
species_df.drop(["Record Status", "Occurrence", "Nativeness", "Abundance", "Seasonality"], axis=1, inplace=True)

In [8]:
species_df.head()

Unnamed: 0,Species ID,Park Name,Category,Order,Family,Scientific Name,Common Names,Conservation Status
0,ACAD-1000,Acadia National Park,Mammal,Artiodactyla,Cervidae,Alces alces,Moose,
1,ACAD-1001,Acadia National Park,Mammal,Artiodactyla,Cervidae,Odocoileus virginianus,"Northern White-Tailed Deer, Virginia Deer, Whi...",
2,ACAD-1002,Acadia National Park,Mammal,Carnivora,Canidae,Canis latrans,"Coyote, Eastern Coyote",Species of Concern
3,ACAD-1003,Acadia National Park,Mammal,Carnivora,Canidae,Canis lupus,"Eastern Timber Wolf, Gray Wolf, Timber Wolf",Endangered
4,ACAD-1004,Acadia National Park,Mammal,Carnivora,Canidae,Vulpes vulpes,"Black Fox, Cross Fox, Eastern Red Fox, Fox, Re...",


In [9]:
# Rename the columns on the dataframe

species_df.rename(columns={
    'Species ID': 'Species_ID',
    'Park Name': 'Park_Name',
    'Scientific Name': 'Scientific_Name',
    'Common Names': 'Common_Names',
    'Conservation Status': 'Conservation_Status'
}, inplace=True)

species_df.head()

Unnamed: 0,Species_ID,Park_Name,Category,Order,Family,Scientific_Name,Common_Names,Conservation_Status
0,ACAD-1000,Acadia National Park,Mammal,Artiodactyla,Cervidae,Alces alces,Moose,
1,ACAD-1001,Acadia National Park,Mammal,Artiodactyla,Cervidae,Odocoileus virginianus,"Northern White-Tailed Deer, Virginia Deer, Whi...",
2,ACAD-1002,Acadia National Park,Mammal,Carnivora,Canidae,Canis latrans,"Coyote, Eastern Coyote",Species of Concern
3,ACAD-1003,Acadia National Park,Mammal,Carnivora,Canidae,Canis lupus,"Eastern Timber Wolf, Gray Wolf, Timber Wolf",Endangered
4,ACAD-1004,Acadia National Park,Mammal,Carnivora,Canidae,Vulpes vulpes,"Black Fox, Cross Fox, Eastern Red Fox, Fox, Re...",


In [10]:
parks_df.to_json('Resources/parks.json', orient='records', lines=True)
species_df.to_json('Resources/species.json', orient='records', lines=True)

In [12]:
merged_df = pd.merge(parks_df, species_df, on="Park_Name")
merged_df.head()

Unnamed: 0,Park_Code,Park_Name,State,Acres,Latitude,Longitude,Species_ID,Category,Order,Family,Scientific_Name,Common_Names,Conservation_Status
0,ACAD,Acadia National Park,ME,47390,44.35,-68.21,ACAD-1000,Mammal,Artiodactyla,Cervidae,Alces alces,Moose,
1,ACAD,Acadia National Park,ME,47390,44.35,-68.21,ACAD-1001,Mammal,Artiodactyla,Cervidae,Odocoileus virginianus,"Northern White-Tailed Deer, Virginia Deer, Whi...",
2,ACAD,Acadia National Park,ME,47390,44.35,-68.21,ACAD-1002,Mammal,Carnivora,Canidae,Canis latrans,"Coyote, Eastern Coyote",Species of Concern
3,ACAD,Acadia National Park,ME,47390,44.35,-68.21,ACAD-1003,Mammal,Carnivora,Canidae,Canis lupus,"Eastern Timber Wolf, Gray Wolf, Timber Wolf",Endangered
4,ACAD,Acadia National Park,ME,47390,44.35,-68.21,ACAD-1004,Mammal,Carnivora,Canidae,Vulpes vulpes,"Black Fox, Cross Fox, Eastern Red Fox, Fox, Re...",


In [13]:
df_split = merged_df.assign(State=merged_df["State"].str.split(", ")).explode("State")
df_split = df_split.reset_index(drop=True)

df_split.head()

Unnamed: 0,Park_Code,Park_Name,State,Acres,Latitude,Longitude,Species_ID,Category,Order,Family,Scientific_Name,Common_Names,Conservation_Status
0,ACAD,Acadia National Park,ME,47390,44.35,-68.21,ACAD-1000,Mammal,Artiodactyla,Cervidae,Alces alces,Moose,
1,ACAD,Acadia National Park,ME,47390,44.35,-68.21,ACAD-1001,Mammal,Artiodactyla,Cervidae,Odocoileus virginianus,"Northern White-Tailed Deer, Virginia Deer, Whi...",
2,ACAD,Acadia National Park,ME,47390,44.35,-68.21,ACAD-1002,Mammal,Carnivora,Canidae,Canis latrans,"Coyote, Eastern Coyote",Species of Concern
3,ACAD,Acadia National Park,ME,47390,44.35,-68.21,ACAD-1003,Mammal,Carnivora,Canidae,Canis lupus,"Eastern Timber Wolf, Gray Wolf, Timber Wolf",Endangered
4,ACAD,Acadia National Park,ME,47390,44.35,-68.21,ACAD-1004,Mammal,Carnivora,Canidae,Vulpes vulpes,"Black Fox, Cross Fox, Eastern Red Fox, Fox, Re...",


In [14]:
df_split.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 138242 entries, 0 to 138241
Data columns (total 13 columns):
 #   Column               Non-Null Count   Dtype  
---  ------               --------------   -----  
 0   Park_Code            138242 non-null  object 
 1   Park_Name            138242 non-null  object 
 2   State                138242 non-null  object 
 3   Acres                138242 non-null  int64  
 4   Latitude             138242 non-null  float64
 5   Longitude            138242 non-null  float64
 6   Species_ID           138242 non-null  object 
 7   Category             138242 non-null  object 
 8   Order                136478 non-null  object 
 9   Family               136455 non-null  object 
 10  Scientific_Name      138242 non-null  object 
 11  Common_Names         102299 non-null  object 
 12  Conservation_Status  5294 non-null    object 
dtypes: float64(2), int64(1), object(10)
memory usage: 13.7+ MB


In [29]:
# Get state values for dashboard filter
print(df_split["State"].nunique())
df_split.State.sort_values().unique()

27


array(['AK', 'AR', 'AZ', 'CA', 'CO', 'FL', 'HI', 'ID', 'KY', 'ME', 'MI',
       'MN', 'MT', 'NC', 'ND', 'NM', 'NV', 'OH', 'OR', 'SC', 'SD', 'TN',
       'TX', 'UT', 'VA', 'WA', 'WY'], dtype=object)

In [17]:
# Check number of parks
print(df_split["Park_Name"].nunique())
df_split["Park_Name"].unique()

56


array(['Acadia National Park', 'Arches National Park',
       'Badlands National Park', 'Big Bend National Park',
       'Biscayne National Park',
       'Black Canyon of the Gunnison National Park',
       'Bryce Canyon National Park', 'Canyonlands National Park',
       'Capitol Reef National Park', 'Carlsbad Caverns National Park',
       'Channel Islands National Park', 'Congaree National Park',
       'Crater Lake National Park', 'Cuyahoga Valley National Park',
       'Denali National Park and Preserve', 'Death Valley National Park',
       'Dry Tortugas National Park', 'Everglades National Park',
       'Gates Of The Arctic National Park and Preserve',
       'Glacier National Park', 'Glacier Bay National Park and Preserve',
       'Great Basin National Park', 'Grand Canyon National Park',
       'Great Sand Dunes National Park and Preserve',
       'Great Smoky Mountains National Park', 'Grand Teton National Park',
       'Guadalupe Mountains National Park', 'Haleakala National

In [18]:
# Confirm that states are split
mask = df_split.loc[df_split["Park_Name"] == "Yellowstone National Park"]
print(mask)

       Park_Code                  Park_Name State    Acres  Latitude  \
122460      YELL  Yellowstone National Park    WY  2219791      44.6   
122461      YELL  Yellowstone National Park    MT  2219791      44.6   
122462      YELL  Yellowstone National Park    ID  2219791      44.6   
122463      YELL  Yellowstone National Park    WY  2219791      44.6   
122464      YELL  Yellowstone National Park    MT  2219791      44.6   
...          ...                        ...   ...      ...       ...   
134353      YELL  Yellowstone National Park    MT  2219791      44.6   
134354      YELL  Yellowstone National Park    ID  2219791      44.6   
134355      YELL  Yellowstone National Park    WY  2219791      44.6   
134356      YELL  Yellowstone National Park    MT  2219791      44.6   
134357      YELL  Yellowstone National Park    ID  2219791      44.6   

        Longitude Species_ID Category             Order             Family  \
122460     -110.5  YELL-1000   Mammal      Artiodactyla  

In [19]:
# Create engine for conection to the SQLite
engine = create_engine('sqlite:///national_parks.sqlite')

# Save the dataframes to the database
parks_df.to_sql('parks', engine, index=False, if_exists='replace')
species_df.to_sql('species', engine, index=False, if_exists='replace')
df_split.to_sql('all', engine, index=False, if_exists='replace')

138242

In [20]:
# INSPECT to confirm existence

# Create the inspector and connect it to the engine
inspector_gadget = inspect(engine)

# Collect the names of tables within the database
tables = inspector_gadget.get_table_names()

# print metadata for each table
for table in tables:
    print(table)
    print("-----------")
    
    # get columns
    columns = inspector_gadget.get_columns(table)
    for column in columns:
        print(column["name"], column["type"])

    print()

all
-----------
Park_Code TEXT
Park_Name TEXT
State TEXT
Acres BIGINT
Latitude FLOAT
Longitude FLOAT
Species_ID TEXT
Category TEXT
Order TEXT
Family TEXT
Scientific_Name TEXT
Common_Names TEXT
Conservation_Status TEXT

parks
-----------
Park_Code TEXT
Park_Name TEXT
State TEXT
Acres BIGINT
Latitude FLOAT
Longitude FLOAT

species
-----------
Species_ID TEXT
Park_Name TEXT
Category TEXT
Order TEXT
Family TEXT
Scientific_Name TEXT
Common_Names TEXT
Conservation_Status TEXT



In [21]:
# Read the data from the database
with engine.connect() as connection:
    parks_table = pd.read_sql('SELECT * FROM parks', connection)
    species_table = pd.read_sql('SELECT * FROM species', connection)

parks_table.head()

Unnamed: 0,Park_Code,Park_Name,State,Acres,Latitude,Longitude
0,ACAD,Acadia National Park,ME,47390,44.35,-68.21
1,ARCH,Arches National Park,UT,76519,38.68,-109.57
2,BADL,Badlands National Park,SD,242756,43.75,-102.5
3,BIBE,Big Bend National Park,TX,801163,29.25,-103.25
4,BISC,Biscayne National Park,FL,172924,25.65,-80.08


In [22]:
species_table.head()

Unnamed: 0,Species_ID,Park_Name,Category,Order,Family,Scientific_Name,Common_Names,Conservation_Status
0,ACAD-1000,Acadia National Park,Mammal,Artiodactyla,Cervidae,Alces alces,Moose,
1,ACAD-1001,Acadia National Park,Mammal,Artiodactyla,Cervidae,Odocoileus virginianus,"Northern White-Tailed Deer, Virginia Deer, Whi...",
2,ACAD-1002,Acadia National Park,Mammal,Carnivora,Canidae,Canis latrans,"Coyote, Eastern Coyote",Species of Concern
3,ACAD-1003,Acadia National Park,Mammal,Carnivora,Canidae,Canis lupus,"Eastern Timber Wolf, Gray Wolf, Timber Wolf",Endangered
4,ACAD-1004,Acadia National Park,Mammal,Carnivora,Canidae,Vulpes vulpes,"Black Fox, Cross Fox, Eastern Red Fox, Fox, Re...",


In [23]:
print(parks_table["Park_Name"].nunique())
parks_table["Park_Name"].unique()

56


array(['Acadia National Park', 'Arches National Park',
       'Badlands National Park', 'Big Bend National Park',
       'Biscayne National Park',
       'Black Canyon of the Gunnison National Park',
       'Bryce Canyon National Park', 'Canyonlands National Park',
       'Capitol Reef National Park', 'Carlsbad Caverns National Park',
       'Channel Islands National Park', 'Congaree National Park',
       'Crater Lake National Park', 'Cuyahoga Valley National Park',
       'Denali National Park and Preserve', 'Death Valley National Park',
       'Dry Tortugas National Park', 'Everglades National Park',
       'Gates Of The Arctic National Park and Preserve',
       'Glacier National Park', 'Glacier Bay National Park and Preserve',
       'Great Basin National Park', 'Grand Canyon National Park',
       'Great Sand Dunes National Park and Preserve',
       'Great Smoky Mountains National Park', 'Grand Teton National Park',
       'Guadalupe Mountains National Park', 'Haleakala National

In [24]:
# Get the number of endangered species per state park for bar chart

query = """
    SELECT 
        p.State,
        p.Park_Name,
        COUNT(s.Park_Name) AS Endangered_Species_Count
    FROM 
        parks p
    LEFT JOIN 
        species s
    ON 
        p.Park_Name = s.Park_Name
    AND 
        s.Conservation_Status = 'Endangered'
    GROUP BY 
        p.State,
        p.Park_Name
    HAVING 
        COUNT(s.Park_Name) > 0
    ORDER BY 
        p.State,
        p.Park_Name;
"""

table_df = pd.read_sql(text(query), con=engine)
table_df.head()

Unnamed: 0,State,Park_Name,Endangered_Species_Count
0,AK,Glacier Bay National Park and Preserve,9
1,AK,Katmai National Park and Preserve,6
2,AK,Kenai Fjords National Park,8
3,AK,Lake Clark National Park and Preserve,3
4,AK,Wrangell - St Elias National Park and Preserve,5


In [25]:
# Get the size of parks per state to make a stacked bar

query2 = """
    SELECT 
        State,
        Park_Name,
        Acres
    FROM 
        parks
    GROUP BY 
        State,
        Park_Name
    ORDER BY 
        State,
        Park_Name;
"""

table_df2 = pd.read_sql(text(query2), con=engine)
table_df2.head()

Unnamed: 0,State,Park_Name,Acres
0,AK,Denali National Park and Preserve,3372402
1,AK,Gates Of The Arctic National Park and Preserve,7523898
2,AK,Glacier Bay National Park and Preserve,3224840
3,AK,Katmai National Park and Preserve,3674530
4,AK,Kenai Fjords National Park,669983


In [26]:
# Parks per State

query3 = """
    SELECT 
        State,
        COUNT(Park_Name) AS Number_of_Parks
    FROM 
        parks
    GROUP BY 
        State
    ORDER BY 
        State;
"""
table_df3 = pd.read_sql(text(query3), con=engine)
table_df3.head()

Unnamed: 0,State,Number_of_Parks
0,AK,8
1,AR,1
2,AZ,3
3,CA,7
4,"CA, NV",1


In [27]:
# number of endangered species per state park related to the size of the park for bubble chart
query4 = """
    SELECT 
        p.State,
        p.Park_Name,
        p.Acres,
        COUNT(s.Park_Name) AS Endangered_Species_Count
    FROM 
        parks p
    LEFT JOIN 
        species s
    ON 
        p.Park_Name = s.Park_Name
    AND 
        s.Conservation_Status = 'Endangered'
    GROUP BY 
        p.State,
        p.Park_Name,
        p.Acres
    HAVING 
        COUNT(s.Park_Name) > 0
    ORDER BY 
        p.State,
        p.Park_Name;
"""

table_df4 = pd.read_sql(text(query4), con=engine)
table_df4.head()

Unnamed: 0,State,Park_Name,Acres,Endangered_Species_Count
0,AK,Glacier Bay National Park and Preserve,3224840,9
1,AK,Katmai National Park and Preserve,3674530,6
2,AK,Kenai Fjords National Park,669983,8
3,AK,Lake Clark National Park and Preserve,2619733,3
4,AK,Wrangell - St Elias National Park and Preserve,8323148,5


In [28]:
# Get the map info
query5 = """
    SELECT 
        p.Park_Name,
        p.State,
        p.Latitude,
        p.Longitude,
        COUNT(s.Park_Name) AS Endangered_Species_Count
    FROM 
        parks p
    LEFT JOIN 
        species s
    ON 
        p.Park_Name = s.Park_Name
    AND 
        s.Conservation_Status = 'Endangered'
    GROUP BY 
        p.Park_Name,
        p.State,
        p.Latitude,
        p.Longitude
    ORDER BY 
        p.State,
        p.Park_Name;
    """
table_df5 = pd.read_sql(text(query5), con=engine)
table_df5.head()

Unnamed: 0,Park_Name,State,Latitude,Longitude,Endangered_Species_Count
0,Denali National Park and Preserve,AK,63.33,-150.5,0
1,Gates Of The Arctic National Park and Preserve,AK,67.78,-153.3,0
2,Glacier Bay National Park and Preserve,AK,58.5,-137.0,9
3,Katmai National Park and Preserve,AK,58.5,-155.0,6
4,Kenai Fjords National Park,AK,59.92,-149.65,8


In [None]:
# engine.dispose()