In [1]:
# Data science
import pandas as pd

# API
import requests
import json

# Python SQL toolkit and Object Relational Mapper
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, text, inspect, func

In [2]:
# Read the CSV files
df1 = pd.read_csv('Resources/parks.csv')
df = pd.read_csv('Resources/species.csv')

In [3]:
df1.head()

Unnamed: 0,Park Code,Park Name,State,Acres,Latitude,Longitude
0,ACAD,Acadia National Park,ME,47390,44.35,-68.21
1,ARCH,Arches National Park,UT,76519,38.68,-109.57
2,BADL,Badlands National Park,SD,242756,43.75,-102.5
3,BIBE,Big Bend National Park,TX,801163,29.25,-103.25
4,BISC,Biscayne National Park,FL,172924,25.65,-80.08


In [4]:
df.head()

Unnamed: 0,Species ID,Park Name,Category,Order,Family,Scientific Name,Common Names,Record Status,Occurrence,Nativeness,Abundance,Seasonality,Conservation Status
0,ACAD-1000,Acadia National Park,Mammal,Artiodactyla,Cervidae,Alces alces,Moose,Approved,Present,Native,Rare,Resident,
1,ACAD-1001,Acadia National Park,Mammal,Artiodactyla,Cervidae,Odocoileus virginianus,"Northern White-Tailed Deer, Virginia Deer, Whi...",Approved,Present,Native,Abundant,,
2,ACAD-1002,Acadia National Park,Mammal,Carnivora,Canidae,Canis latrans,"Coyote, Eastern Coyote",Approved,Present,Not Native,Common,,Species of Concern
3,ACAD-1003,Acadia National Park,Mammal,Carnivora,Canidae,Canis lupus,"Eastern Timber Wolf, Gray Wolf, Timber Wolf",Approved,Not Confirmed,Native,,,Endangered
4,ACAD-1004,Acadia National Park,Mammal,Carnivora,Canidae,Vulpes vulpes,"Black Fox, Cross Fox, Eastern Red Fox, Fox, Re...",Approved,Present,Unknown,Common,Breeder,


In [13]:
#drop_list = ["Record Status", "Occurrence", "Nativeness", "Abundance", "Seasonality"]

In [5]:
df.drop(["Record Status", "Occurrence", "Nativeness", "Abundance", "Seasonality"], axis=1, inplace=True)

In [6]:
df.head()

Unnamed: 0,Species ID,Park Name,Category,Order,Family,Scientific Name,Common Names,Conservation Status
0,ACAD-1000,Acadia National Park,Mammal,Artiodactyla,Cervidae,Alces alces,Moose,
1,ACAD-1001,Acadia National Park,Mammal,Artiodactyla,Cervidae,Odocoileus virginianus,"Northern White-Tailed Deer, Virginia Deer, Whi...",
2,ACAD-1002,Acadia National Park,Mammal,Carnivora,Canidae,Canis latrans,"Coyote, Eastern Coyote",Species of Concern
3,ACAD-1003,Acadia National Park,Mammal,Carnivora,Canidae,Canis lupus,"Eastern Timber Wolf, Gray Wolf, Timber Wolf",Endangered
4,ACAD-1004,Acadia National Park,Mammal,Carnivora,Canidae,Vulpes vulpes,"Black Fox, Cross Fox, Eastern Red Fox, Fox, Re...",


In [7]:
# Create engine for conection to the SQLite
engine = create_engine('sqlite:///national_parks.sqlite')

# Guardar los DataFrames en la base de datos
df1.to_sql('parks', engine, index=False, if_exists='replace')
df.to_sql('species', engine, index=False, if_exists='replace')

119248

In [8]:
# Leer los datos desde la base de datos
with engine.connect() as connection:
    parks_table = pd.read_sql('SELECT * FROM parks', connection)
    species_table = pd.read_sql('SELECT * FROM species', connection)

species_table.head()

Unnamed: 0,Species ID,Park Name,Category,Order,Family,Scientific Name,Common Names,Conservation Status
0,ACAD-1000,Acadia National Park,Mammal,Artiodactyla,Cervidae,Alces alces,Moose,
1,ACAD-1001,Acadia National Park,Mammal,Artiodactyla,Cervidae,Odocoileus virginianus,"Northern White-Tailed Deer, Virginia Deer, Whi...",
2,ACAD-1002,Acadia National Park,Mammal,Carnivora,Canidae,Canis latrans,"Coyote, Eastern Coyote",Species of Concern
3,ACAD-1003,Acadia National Park,Mammal,Carnivora,Canidae,Canis lupus,"Eastern Timber Wolf, Gray Wolf, Timber Wolf",Endangered
4,ACAD-1004,Acadia National Park,Mammal,Carnivora,Canidae,Vulpes vulpes,"Black Fox, Cross Fox, Eastern Red Fox, Fox, Re...",


In [9]:
parks_table.head()

Unnamed: 0,Park Code,Park Name,State,Acres,Latitude,Longitude
0,ACAD,Acadia National Park,ME,47390,44.35,-68.21
1,ARCH,Arches National Park,UT,76519,38.68,-109.57
2,BADL,Badlands National Park,SD,242756,43.75,-102.5
3,BIBE,Big Bend National Park,TX,801163,29.25,-103.25
4,BISC,Biscayne National Park,FL,172924,25.65,-80.08


In [None]:
# Q3: How does the size of different parks compare with the conservation status of the animals in each park across the country?

In [52]:
query = """
    SELECT
        s."Park Name",
        s."Conservation Status",
        COUNT (s."Conservation Status") AS "Species Count",
        p.State,
        p.Acres
    FROM
        species as s
    JOIN
        parks as p ON s."Park Name" = p."Park Name"
    WHERE
        s."Conservation Status" <> "None"
        AND s."Conservation Status" IN ('Species of Concern', 'Endangered', 'Under Review', 'Threatened')
    GROUP BY 
        p."Park Name", s."Conservation Status"
    ORDER BY
        p.Acres DESC
    """

q3_df = pd.read_sql(text(query), con=engine)
q3_df

Unnamed: 0,Park Name,Conservation Status,Species Count,State,Acres
0,Wrangell - St Elias National Park and Preserve,Endangered,5,AK,8323148
1,Wrangell - St Elias National Park and Preserve,Species of Concern,40,AK,8323148
2,Wrangell - St Elias National Park and Preserve,Threatened,3,AK,8323148
3,Wrangell - St Elias National Park and Preserve,Under Review,2,AK,8323148
4,Gates Of The Arctic National Park and Preserve,Species of Concern,21,AK,7523898
...,...,...,...,...,...
203,Congaree National Park,Under Review,6,SC,26546
204,Hot Springs National Park,Endangered,4,AR,5550
205,Hot Springs National Park,Species of Concern,61,AR,5550
206,Hot Springs National Park,Threatened,2,AR,5550


In [53]:
q3_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 208 entries, 0 to 207
Data columns (total 5 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   Park Name            208 non-null    object
 1   Conservation Status  208 non-null    object
 2   Species Count        208 non-null    int64 
 3   State                208 non-null    object
 4   Acres                208 non-null    int64 
dtypes: int64(2), object(3)
memory usage: 8.2+ KB


In [54]:
q3_df.value_counts("Conservation Status")

Conservation Status
Species of Concern    56
Threatened            53
Endangered            51
Under Review          48
Name: count, dtype: int64

In [57]:
# Narrow Conservation Status values to those of interest
cons_values = ["Species of Concern", "Endangered", "Under Review", "Threatened"]

mask = q3_df["Conservation Status"].isin(cons_values)
df2 = q3_df[mask]
df2.head(20)

Unnamed: 0,Park Name,Conservation Status,Species Count,State,Acres
0,Wrangell - St Elias National Park and Preserve,Endangered,5,AK,8323148
1,Wrangell - St Elias National Park and Preserve,Species of Concern,40,AK,8323148
2,Wrangell - St Elias National Park and Preserve,Threatened,3,AK,8323148
3,Wrangell - St Elias National Park and Preserve,Under Review,2,AK,8323148
4,Gates Of The Arctic National Park and Preserve,Species of Concern,21,AK,7523898
5,Death Valley National Park,Endangered,24,"CA, NV",4740912
6,Death Valley National Park,Species of Concern,177,"CA, NV",4740912
7,Death Valley National Park,Threatened,16,"CA, NV",4740912
8,Death Valley National Park,Under Review,27,"CA, NV",4740912
9,Katmai National Park and Preserve,Endangered,6,AK,3674530


In [58]:
# Total Species count per park
df3 = df2.groupby("Park Name")["Species Count"].sum()
df3 = df3.sort_values(ascending=False)
df3

Park Name
Death Valley National Park                        244
Redwood National Park                             162
Great Smoky Mountains National Park               140
Channel Islands National Park                     139
Big Bend National Park                            136
Grand Canyon National Park                        132
Hawaii Volcanoes National Park                    115
Joshua Tree National Park                         107
Everglades National Park                          105
Carlsbad Caverns National Park                    102
Great Basin National Park                          99
Zion National Park                                 98
Saguaro National Park                              95
Yosemite National Park                             94
Yellowstone National Park                          91
Shenandoah National Park                           91
Capitol Reef National Park                         91
Guadalupe Mountains National Park                  89
Mesa Verde Nationa