In [1]:
# Data science
import pandas as pd

# API
import requests
import json

# Python SQL toolkit and Object Relational Mapper
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, text, inspect, func

In [2]:
# Read the CSV files
df1 = pd.read_csv('Resources/parks.csv')
df = pd.read_csv('Resources/species.csv')

In [3]:
df1.head()

Unnamed: 0,Park Code,Park Name,State,Acres,Latitude,Longitude
0,ACAD,Acadia National Park,ME,47390,44.35,-68.21
1,ARCH,Arches National Park,UT,76519,38.68,-109.57
2,BADL,Badlands National Park,SD,242756,43.75,-102.5
3,BIBE,Big Bend National Park,TX,801163,29.25,-103.25
4,BISC,Biscayne National Park,FL,172924,25.65,-80.08


In [4]:
df.head()

Unnamed: 0,Species ID,Park Name,Category,Order,Family,Scientific Name,Common Names,Record Status,Occurrence,Nativeness,Abundance,Seasonality,Conservation Status
0,ACAD-1000,Acadia National Park,Mammal,Artiodactyla,Cervidae,Alces alces,Moose,Approved,Present,Native,Rare,Resident,
1,ACAD-1001,Acadia National Park,Mammal,Artiodactyla,Cervidae,Odocoileus virginianus,"Northern White-Tailed Deer, Virginia Deer, Whi...",Approved,Present,Native,Abundant,,
2,ACAD-1002,Acadia National Park,Mammal,Carnivora,Canidae,Canis latrans,"Coyote, Eastern Coyote",Approved,Present,Not Native,Common,,Species of Concern
3,ACAD-1003,Acadia National Park,Mammal,Carnivora,Canidae,Canis lupus,"Eastern Timber Wolf, Gray Wolf, Timber Wolf",Approved,Not Confirmed,Native,,,Endangered
4,ACAD-1004,Acadia National Park,Mammal,Carnivora,Canidae,Vulpes vulpes,"Black Fox, Cross Fox, Eastern Red Fox, Fox, Re...",Approved,Present,Unknown,Common,Breeder,


In [13]:
#drop_list = ["Record Status", "Occurrence", "Nativeness", "Abundance", "Seasonality"]

In [5]:
df.drop(["Record Status", "Occurrence", "Nativeness", "Abundance", "Seasonality"], axis=1, inplace=True)

In [6]:
df.head()

Unnamed: 0,Species ID,Park Name,Category,Order,Family,Scientific Name,Common Names,Conservation Status
0,ACAD-1000,Acadia National Park,Mammal,Artiodactyla,Cervidae,Alces alces,Moose,
1,ACAD-1001,Acadia National Park,Mammal,Artiodactyla,Cervidae,Odocoileus virginianus,"Northern White-Tailed Deer, Virginia Deer, Whi...",
2,ACAD-1002,Acadia National Park,Mammal,Carnivora,Canidae,Canis latrans,"Coyote, Eastern Coyote",Species of Concern
3,ACAD-1003,Acadia National Park,Mammal,Carnivora,Canidae,Canis lupus,"Eastern Timber Wolf, Gray Wolf, Timber Wolf",Endangered
4,ACAD-1004,Acadia National Park,Mammal,Carnivora,Canidae,Vulpes vulpes,"Black Fox, Cross Fox, Eastern Red Fox, Fox, Re...",


In [7]:
# Create engine for conection to the SQLite
engine = create_engine('sqlite:///national_parks.sqlite')

# Guardar los DataFrames en la base de datos
df1.to_sql('parks', engine, index=False, if_exists='replace')
df.to_sql('species', engine, index=False, if_exists='replace')

119248

In [8]:
# Leer los datos desde la base de datos
with engine.connect() as connection:
    parks_table = pd.read_sql('SELECT * FROM parks', connection)
    species_table = pd.read_sql('SELECT * FROM species', connection)

species_table.head()

Unnamed: 0,Species ID,Park Name,Category,Order,Family,Scientific Name,Common Names,Conservation Status
0,ACAD-1000,Acadia National Park,Mammal,Artiodactyla,Cervidae,Alces alces,Moose,
1,ACAD-1001,Acadia National Park,Mammal,Artiodactyla,Cervidae,Odocoileus virginianus,"Northern White-Tailed Deer, Virginia Deer, Whi...",
2,ACAD-1002,Acadia National Park,Mammal,Carnivora,Canidae,Canis latrans,"Coyote, Eastern Coyote",Species of Concern
3,ACAD-1003,Acadia National Park,Mammal,Carnivora,Canidae,Canis lupus,"Eastern Timber Wolf, Gray Wolf, Timber Wolf",Endangered
4,ACAD-1004,Acadia National Park,Mammal,Carnivora,Canidae,Vulpes vulpes,"Black Fox, Cross Fox, Eastern Red Fox, Fox, Re...",


In [9]:
parks_table.head()

Unnamed: 0,Park Code,Park Name,State,Acres,Latitude,Longitude
0,ACAD,Acadia National Park,ME,47390,44.35,-68.21
1,ARCH,Arches National Park,UT,76519,38.68,-109.57
2,BADL,Badlands National Park,SD,242756,43.75,-102.5
3,BIBE,Big Bend National Park,TX,801163,29.25,-103.25
4,BISC,Biscayne National Park,FL,172924,25.65,-80.08


In [None]:
# Q3: How does the size of different parks compare with the conservation status of the animals in each park across the country?

In [18]:
query = """
    SELECT
        s."Park Name",
        s."Scientific Name",
        s.Category,
        s."Conservation Status",
        p.State,
        p.Acres
    FROM
        species as s
    JOIN
        parks as p ON s."Park Name" = p."Park Name"
    WHERE
        s."Conservation Status" <> "None"
    ORDER BY
        p."Park Name"
    """

q3_df = pd.read_sql(text(query), con=engine)
q3_df

Unnamed: 0,Park Name,Scientific Name,Category,Conservation Status,State,Acres
0,Acadia National Park,Canis latrans,Mammal,Species of Concern,ME,47390
1,Acadia National Park,Canis lupus,Mammal,Endangered,ME,47390
2,Acadia National Park,Eptesicus fuscus,Mammal,Species of Concern,ME,47390
3,Acadia National Park,Lasionycteris noctivagans,Mammal,Species of Concern,ME,47390
4,Acadia National Park,Myotis keenii,Mammal,Species of Concern,ME,47390
...,...,...,...,...,...,...
4713,Zion National Park,Pediomelum epipsilum,Vascular Plant,Species of Concern,UT,146598
4714,Zion National Park,Camissonia bairdii,Vascular Plant,Species of Concern,UT,146598
4715,Zion National Park,Epilobium nevadense,Vascular Plant,Species of Concern,UT,146598
4716,Zion National Park,Carex haysii,Vascular Plant,Species of Concern,UT,146598


In [19]:
q3_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4718 entries, 0 to 4717
Data columns (total 6 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   Park Name            4718 non-null   object
 1   Scientific Name      4718 non-null   object
 2   Category             4718 non-null   object
 3   Conservation Status  4718 non-null   object
 4   State                4718 non-null   object
 5   Acres                4718 non-null   int64 
dtypes: int64(1), object(5)
memory usage: 221.3+ KB


In [20]:
q3_df.value_counts("Conservation Status")

Conservation Status
Species of Concern     3843
Endangered              374
Under Review            194
Threatened              184
In Recovery              77
Proposed Endangered      24
Breeder                   7
Proposed Threatened       7
Resident                  5
Migratory                 2
Extinct                   1
Name: count, dtype: int64