In [1]:
%pwd

'/Users/ims/MEDS/eds213-data/bren-eds213-data/database'

In [2]:
%ls

Untitle.ipynb    [0m[01;34mexport_adsn[0m/             species_test.csv  tuesday.sql
[01;32mbuild-database[0m*  export_data.sql          sql-review.sql    week4-tue.sql
database.db      import_snow_cover.sql    sql-wrapup.sql
database.db.wal  schema-build-script.sql  thu.sql
database.sqlite  species_count.csv        trigger.sql


In [4]:
%pip install duckdb

Collecting duckdb
  Downloading duckdb-1.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (966 bytes)
Downloading duckdb-1.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (20.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m20.2/20.2 MB[0m [31m191.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: duckdb
Successfully installed duckdb-1.2.2
Note: you may need to restart the kernel to use updated packages.


In [5]:
import duckdb

In [7]:
%ls database.db

database.db


In [10]:
conn = duckdb.connect('database.db')

In [11]:
conn

<duckdb.duckdb.DuckDBPyConnection at 0x7f5b19c57030>

Create a cursor to hold the context of executing a query or other SQL statement.

In [12]:
cur = conn.cursor()

In [13]:
cur.execute("SELECT * FROM Site LIMIT 3")

<duckdb.duckdb.DuckDBPyConnection at 0x7f5b300fb6b0>

METHOD 1: getting all at once using fetchall()

In [14]:
cur.fetchall()

[('barr',
  'Barrow',
  'Alaska, USA',
  71.30000305175781,
  -156.60000610351562,
  220.39999389648438),
 ('burn',
  'Burntpoint Creek',
  'Ontario, Canada',
  55.20000076293945,
  -84.30000305175781,
  63.0),
 ('bylo',
  'Bylot Island',
  'Nunavut, Canada',
  73.19999694824219,
  -80.0,
  723.5999755859375)]

Cursors don't save any results; they're just a pass-through mechanism

In [15]:
cur.fetchall()

[]

Cursors always return list of tuples, even 1-tuples

In [18]:
cur.execute("SELECT Code FROM Site LIMIT 3")
cur.fetchall()

[('barr',), ('burn',), ('bylo',)]

Pretty common to use a list comprehension to pull out the values more conveniently

In [19]:
cur.execute("SELECT Code FROM site LIMIT 3")

<duckdb.duckdb.DuckDBPyConnection at 0x7f5b300fb6b0>

In [20]:
[t[0] for t in cur.fetchall()]

['barr', 'burn', 'bylo']

METHOD 2: getting one result (at a time)

In [21]:
cur.execute("SELECT Code FROM site LIMIT 3")
cur.fetchone()

('barr',)

In [22]:
cur.fetchone()

('burn',)

In [23]:
cur.fetchone()

('bylo',)

METHOD 3: iterate over a cursor -- sadly not supported by DuckDB

In [24]:
cur.execute("SELECT Code FROM site LIMIT 3")
for row in cur:
    print(row)

TypeError: 'duckdb.duckdb.DuckDBPyConnection' object is not iterable

Can do things other than SELECTs!

In [None]:
cur.execute("""CREATE )

In [25]:
cur.fetchone()[0]

'barr'

A note on fragility: name the columns being requested

In [27]:
cur.execute("SELECT * FROM Site LIMIT 3")
cur.fetchall()

[('barr',
  'Barrow',
  'Alaska, USA',
  71.30000305175781,
  -156.60000610351562,
  220.39999389648438),
 ('burn',
  'Burntpoint Creek',
  'Ontario, Canada',
  55.20000076293945,
  -84.30000305175781,
  63.0),
 ('bylo',
  'Bylot Island',
  'Nunavut, Canada',
  73.19999694824219,
  -80.0,
  723.5999755859375)]

A better practice: spell it out. More specifically: don't do a SELECT *.

In [28]:
cur.execute("SELECT Code, Latitude, Longitude FROM Site LIMIT 3")
cur.fetchall()

[('barr', 71.30000305175781, -156.60000610351562),
 ('burn', 55.20000076293945, -84.30000305175781),
 ('bylo', 73.19999694824219, -80.0)]

Parameterized queries

In [30]:
cur.execute("SELECT Code FROM site LIMIT 3")
cur.fetchall()

[('barr',), ('burn',), ('bylo',)]

Hypothesize: we want to know the number of nests for each species ( and we're not going to use GROUP BY)

In [31]:
species = "agsq"
query = "SELECT COUNT(*) FROM Bird_nests WHERE species = ?"
cur.execute(query, [species])
cur.fetchall()

[(0,)]

Let's put two things together: query for all species, and loop over those species, getting the number of nests for each species

In [32]:
cur.execute("SELECT Code FROM Species")

# In any other database, could iterate over the cursor
for row in cur.fetchall(): 
    species_code = row[0]
    cur2 = conn.cursor()
    cur2.execute(query, [species_code])
    num_nests = cur2.fetchone()[0]
    print(f"Species {species_code} has {num_nests} nests")
    cur2.close()

Species agsq has 0 nests
Species amcr has 0 nests
Species amgp has 29 nests
Species arfo has 0 nests
Species arte has 0 nests
Species basa has 0 nests
Species bbis has 0 nests
Species bbpl has 43 nests
Species bbsa has 0 nests
Species besw has 0 nests
Species bltu has 0 nests
Species brant has 0 nests
Species brbe has 0 nests
Species brle has 0 nests
Species btcu has 0 nests
Species btgo has 3 nests
Species cole has 0 nests
Species cora has 0 nests
Species cosn has 0 nests
Species crpl has 2 nests
Species cusa has 0 nests
Species dunl has 101 nests
Species eywa has 0 nests
Species glgu has 0 nests
Species goea has 0 nests
Species gwfg has 0 nests
Species gwgu has 0 nests
Species gwte has 0 nests
Species gyrf has 0 nests
Species herg has 3 nests
Species hore has 0 nests
Species hugo has 0 nests
Species kill has 0 nests
Species lalo has 33 nests
Species lbdo has 1 nests
Species lesa has 0 nests
Species leye has 0 nests
Species list has 0 nests
Species ltdu has 0 nests
Species ltja has 0 

There are lots of convenience functions and packages

In [33]:
import pandas as pd

In [34]:
df = pd.read_sql("SELECT * FROM Site", conn)

  df = pd.read_sql("SELECT * FROM Site", conn)


In [35]:
df

Unnamed: 0,Code,Site_name,Location,Latitude,Longitude,Area
0,barr,Barrow,"Alaska, USA",71.300003,-156.600006,220.399994
1,burn,Burntpoint Creek,"Ontario, Canada",55.200001,-84.300003,63.0
2,bylo,Bylot Island,"Nunavut, Canada",73.199997,-80.0,723.599976
3,cakr,Cape Krusenstern,"Alaska, USA",67.099998,-163.5,54.099998
4,cari,Canning River Delta,"Alaska, USA",70.099998,-145.800003,722.0
5,chau,Chaun River Delta,"Chukotka, Russia",68.800003,170.600006,248.199997
6,chur,Churchill,"Manitoba, Canada",58.700001,-93.800003,866.900024
7,coat,Coats Island,"Nunavut, Canada",62.900002,-82.5,1239.099976
8,colv,Colville River Delta,"Alaska, USA",70.400002,-150.699997,324.799988
9,eaba,East Bay,"Nunavut, Canada",64.0,-81.699997,1205.5
