# duckdb

- SQLite 是 OLTP ，主要是增删改查
- DuckDB 是 OLAP ，主要是做数据分析的


## References

- https://duckdb.org/docs/stable/clients/python/overview.html

In [2]:
import duckdb

duckdb.sql("SELECT 42").show()

┌───────┐
│  42   │
│ int32 │
├───────┤
│    42 │
└───────┘



In [1]:
import duckdb

r1 = duckdb.sql("SELECT 42 AS i")
duckdb.sql("SELECT i * 2 AS k FROM r1").show()

┌───────┐
│   k   │
│ int32 │
├───────┤
│    84 │
└───────┘



In [1]:
import duckdb
import pandas as pd

pandas_df = pd.DataFrame({"a": [42]})
duckdb.sql("SELECT * FROM pandas_df")

┌───────┐
│   a   │
│ int64 │
├───────┤
│    42 │
└───────┘

In [1]:
import duckdb
import polars as pl

polars_df = pl.DataFrame({"a": [42]})
duckdb.sql("SELECT * FROM polars_df")

┌───────┐
│   a   │
│ int64 │
├───────┤
│    42 │
└───────┘

In [2]:
import duckdb
import pyarrow as pa

arrow_table = pa.Table.from_pydict({"a": [42]})
duckdb.sql("SELECT * FROM arrow_table")

┌───────┐
│   a   │
│ int64 │
├───────┤
│    42 │
└───────┘

In [4]:
import duckdb

duckdb.sql("SELECT 42").fetchall()  # Python objects
duckdb.sql("SELECT 42").df()  # Pandas DataFrame
duckdb.sql("SELECT 42").pl()  # Polars DataFrame
duckdb.sql("SELECT 42").arrow()  # Arrow Table
duckdb.sql("SELECT 42").fetchnumpy()  # NumPy Arrays

{'42': array([42], dtype=int32)}

In [7]:
import os
import duckdb

# create tmp directory
if not os.path.exists("tmp"):
    os.makedirs("tmp")

duckdb.sql("SELECT 42").write_parquet("tmp/out.parquet")  # Write to a Parquet file
duckdb.sql("SELECT 42").write_csv("tmp/out.csv")  # Write to a CSV file
duckdb.sql("COPY (SELECT 42) TO 'tmp/out.parquet'")  # Copy to a Parquet file

In [8]:
import duckdb

con = duckdb.connect()
con.sql("SELECT 42 AS x").show()

┌───────┐
│   x   │
│ int32 │
├───────┤
│    42 │
└───────┘



In [9]:
import duckdb

# create a connection to a file called 'file.db'
con = duckdb.connect("tmp/file.db")
# create a table and load data into it
con.sql("CREATE TABLE test (i INTEGER)")
con.sql("INSERT INTO test VALUES (42)")
# query the table
con.table("test").show()
# explicitly close the connection
con.close()
# Note: connections also closed implicitly when they go out of scope

┌───────┐
│   i   │
│ int32 │
├───────┤
│    42 │
└───────┘



In [11]:
import duckdb

with duckdb.connect("tmp/file2.db") as con:
    con.sql("CREATE TABLE test (i INTEGER)")
    con.sql("INSERT INTO test VALUES (42)")
    con.table("test").show()
    # the context manager closes the connection automatically

┌───────┐
│   i   │
│ int32 │
├───────┤
│    42 │
└───────┘

