In [2]:
# Data science
import pandas as pd

# API
import requests
import json

# Python SQL toolkit and Object Relational Mapper
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, text, inspect, func

In [3]:
# Read the CSV files
df1 = pd.read_csv('Resources/parks.csv')
df = pd.read_csv('Resources/species.csv')

In [4]:
df1.head()

Unnamed: 0,Park Code,Park Name,State,Acres,Latitude,Longitude
0,ACAD,Acadia National Park,ME,47390,44.35,-68.21
1,ARCH,Arches National Park,UT,76519,38.68,-109.57
2,BADL,Badlands National Park,SD,242756,43.75,-102.5
3,BIBE,Big Bend National Park,TX,801163,29.25,-103.25
4,BISC,Biscayne National Park,FL,172924,25.65,-80.08


In [5]:
df.head()

Unnamed: 0,Species ID,Park Name,Category,Order,Family,Scientific Name,Common Names,Record Status,Occurrence,Nativeness,Abundance,Seasonality,Conservation Status
0,ACAD-1000,Acadia National Park,Mammal,Artiodactyla,Cervidae,Alces alces,Moose,Approved,Present,Native,Rare,Resident,
1,ACAD-1001,Acadia National Park,Mammal,Artiodactyla,Cervidae,Odocoileus virginianus,"Northern White-Tailed Deer, Virginia Deer, Whi...",Approved,Present,Native,Abundant,,
2,ACAD-1002,Acadia National Park,Mammal,Carnivora,Canidae,Canis latrans,"Coyote, Eastern Coyote",Approved,Present,Not Native,Common,,Species of Concern
3,ACAD-1003,Acadia National Park,Mammal,Carnivora,Canidae,Canis lupus,"Eastern Timber Wolf, Gray Wolf, Timber Wolf",Approved,Not Confirmed,Native,,,Endangered
4,ACAD-1004,Acadia National Park,Mammal,Carnivora,Canidae,Vulpes vulpes,"Black Fox, Cross Fox, Eastern Red Fox, Fox, Re...",Approved,Present,Unknown,Common,Breeder,


In [6]:
#drop_list = ["Record Status", "Occurrence", "Nativeness", "Abundance", "Seasonality"]

In [7]:
df.drop(["Record Status", "Occurrence", "Nativeness", "Abundance", "Seasonality"], axis=1, inplace=True)

In [8]:
df.head()

Unnamed: 0,Species ID,Park Name,Category,Order,Family,Scientific Name,Common Names,Conservation Status
0,ACAD-1000,Acadia National Park,Mammal,Artiodactyla,Cervidae,Alces alces,Moose,
1,ACAD-1001,Acadia National Park,Mammal,Artiodactyla,Cervidae,Odocoileus virginianus,"Northern White-Tailed Deer, Virginia Deer, Whi...",
2,ACAD-1002,Acadia National Park,Mammal,Carnivora,Canidae,Canis latrans,"Coyote, Eastern Coyote",Species of Concern
3,ACAD-1003,Acadia National Park,Mammal,Carnivora,Canidae,Canis lupus,"Eastern Timber Wolf, Gray Wolf, Timber Wolf",Endangered
4,ACAD-1004,Acadia National Park,Mammal,Carnivora,Canidae,Vulpes vulpes,"Black Fox, Cross Fox, Eastern Red Fox, Fox, Re...",


In [9]:
# Create engine for conection to the SQLite
engine = create_engine('sqlite:///national_parks.sqlite')

# Guardar los DataFrames en la base de datos
df1.to_sql('parks', engine, index=False, if_exists='replace')
df.to_sql('species', engine, index=False, if_exists='replace')

119248

In [10]:
# Leer los datos desde la base de datos
with engine.connect() as connection:
    result1 = pd.read_sql('SELECT * FROM parks', connection)
    result2 = pd.read_sql('SELECT * FROM species', connection)

# Mostrar los resultados
#print(result1.head())
result2.head()

Unnamed: 0,Species ID,Park Name,Category,Order,Family,Scientific Name,Common Names,Conservation Status
0,ACAD-1000,Acadia National Park,Mammal,Artiodactyla,Cervidae,Alces alces,Moose,
1,ACAD-1001,Acadia National Park,Mammal,Artiodactyla,Cervidae,Odocoileus virginianus,"Northern White-Tailed Deer, Virginia Deer, Whi...",
2,ACAD-1002,Acadia National Park,Mammal,Carnivora,Canidae,Canis latrans,"Coyote, Eastern Coyote",Species of Concern
3,ACAD-1003,Acadia National Park,Mammal,Carnivora,Canidae,Canis lupus,"Eastern Timber Wolf, Gray Wolf, Timber Wolf",Endangered
4,ACAD-1004,Acadia National Park,Mammal,Carnivora,Canidae,Vulpes vulpes,"Black Fox, Cross Fox, Eastern Red Fox, Fox, Re...",


In [11]:
result1.head()

Unnamed: 0,Park Code,Park Name,State,Acres,Latitude,Longitude
0,ACAD,Acadia National Park,ME,47390,44.35,-68.21
1,ARCH,Arches National Park,UT,76519,38.68,-109.57
2,BADL,Badlands National Park,SD,242756,43.75,-102.5
3,BIBE,Big Bend National Park,TX,801163,29.25,-103.25
4,BISC,Biscayne National Park,FL,172924,25.65,-80.08


In [12]:
query = """
    SELECT
        "Park Name",
        "Conservation Status",
        "Scientific Name"
    FROM
        species
    WHERE
        "Conservation Status" = 'Endangered'
    ORDER BY
        "Park Name", "Conservation Status", "Scientific Name";
"""

# Execute the query
table_df = pd.read_sql(text(query), con=engine)
table_df.head(10)

Unnamed: 0,Park Name,Conservation Status,Scientific Name
0,Acadia National Park,Endangered,Canis lupus
1,Acadia National Park,Endangered,Numenius borealis
2,Acadia National Park,Endangered,Salmo salar
3,Arches National Park,Endangered,Empidonax traillii extimus
4,Arches National Park,Endangered,Gymnogyps californianus
5,Badlands National Park,Endangered,Grus americana
6,Badlands National Park,Endangered,Mustela nigripes
7,Big Bend National Park,Endangered,Canis lupus
8,Big Bend National Park,Endangered,Cryptantha crassipes
9,Big Bend National Park,Endangered,Dendroica chrysoparia


In [13]:
df1.to_json('Resources/parks.json', orient='records', lines=True)
df.to_json('Resources/species.json', orient='records', lines=True)

In [34]:
query2 = """
        SELECT
            p."Park Name",
            COUNT(s."Scientific Name") AS species_count,
            s."Category",
            p."State"
        FROM
            Parks p
        JOIN
            Species s
        ON
            p."Park Name" = s."Park Name"
        WHERE
            s."Conservation Status" = 'Endangered'
        GROUP BY
            p."Park Name",
            s."Category"
        ORDER BY
            species_count DESC
        LIMIT 10;
    """

table_df2 = pd.read_sql(text(query2), con=engine)
table_df2

Unnamed: 0,Park Name,species_count,Category,State
0,Hawaii Volcanoes National Park,33,Vascular Plant,HI
1,Haleakala National Park,32,Vascular Plant,HI
2,Channel Islands National Park,12,Vascular Plant,CA
3,Death Valley National Park,10,Vascular Plant,"CA, NV"
4,Death Valley National Park,8,Insect,"CA, NV"
5,Mammoth Cave National Park,8,Invertebrate,KY
6,Glacier Bay National Park and Preserve,7,Mammal,AK
7,Kenai Fjords National Park,7,Mammal,AK
8,Channel Islands National Park,6,Mammal,CA
9,Redwood National Park,6,Mammal,CA


In [None]:
#parks."Park Name",parks.state, COUNT(DISTINCT species.Category) AS NumberOfSpecies FROM species

In [27]:
query2 = """
        SELECT
            p."Park Name",
            COUNT(DISTINCT s.Category) AS category_count
        FROM
            Parks p
        JOIN
            Species s
        ON
            p."Park Name" = s."Park Name"
        WHERE
            s."Conservation Status" = 'Endangered'
        GROUP BY
            p."Park Name"
            s."category"
        ORDER BY
            category_count DESC
        LIMIT 10;
    """

table_df2 = pd.read_sql(text(query2), con=engine)
table_df2

OperationalError: (sqlite3.OperationalError) near "s": syntax error
[SQL: 
        SELECT
            p."Park Name",
            COUNT(DISTINCT s.Category) AS category_count
        FROM
            Parks p
        JOIN
            Species s
        ON
            p."Park Name" = s."Park Name"
        WHERE
            s."Conservation Status" = 'Endangered'
        GROUP BY
            p."Park Name"
            s."category"
        ORDER BY
            category_count DESC
        LIMIT 10;
    ]
(Background on this error at: https://sqlalche.me/e/20/e3q8)