# SQL on Pandas


## References

- https://duckdb.org/docs/stable/guides/python/sql_on_pandas
- https://duckdb.org/docs/stable/guides/python/import_pandas
- https://duckdb.org/docs/stable/guides/python/export_pandas
- https://duckdb.org/docs/stable/guides/python/relational_api_pandas

In [2]:
import duckdb
import pandas

# Create a Pandas dataframe
my_df = pandas.DataFrame.from_dict({"a": [42]})

# query the Pandas DataFrame "my_df"
# Note: duckdb.sql connects to the default in-memory database connection
results = duckdb.sql("SELECT * FROM my_df").df()
results

Unnamed: 0,a
0,42


In [7]:
import duckdb
import pandas

# Create a Pandas dataframe
my_df = pandas.DataFrame.from_dict({"a": [42]})

# create the table "my_table" from the DataFrame "my_df"
# Note: duckdb.sql connects to the default in-memory database connection
duckdb.sql("CREATE TABLE IF NOT EXISTS my_table AS SELECT * FROM my_df")

# insert into the table "my_table" from the DataFrame "my_df"
duckdb.sql("INSERT INTO my_table SELECT * FROM my_df")

duckdb.sql("INSERT INTO my_table BY NAME SELECT * FROM my_df")

# query the table "my_table"
duckdb.sql("SELECT * FROM my_table").show()

┌───────┐
│   a   │
│ int64 │
├───────┤
│    42 │
│    42 │
│    42 │
│    42 │
│    42 │
└───────┘



In [None]:
import duckdb

# https://duckdb.org/docs/stable/guides/python/export_pandas
# read the result of an arbitrary SQL query to a Pandas DataFrame
results = duckdb.sql("SELECT 42").df()
results

Unnamed: 0,42
0,42


In [None]:
import duckdb
import pandas

# connect to an in-memory database
con = duckdb.connect()

input_df = pandas.DataFrame.from_dict(
    {"i": [1, 2, 3, 4], "j": ["one", "two", "three", "four"]}
)

# create a DuckDB relation from a dataframe
rel = con.from_df(input_df)
rel.show()

# chain together relational operators (this is a lazy operation, so the operations are not yet executed)
# equivalent to: SELECT i, j, i*2 AS two_i FROM input_df WHERE i >= 2 ORDER BY i DESC LIMIT 2
transformed_rel = (
    rel.filter("i >= 2").project("i, j, i*2 AS two_i").order("i DESC").limit(2)
)

# trigger execution by requesting .df() of the relation
# .df() could have been added to the end of the chain above - it was separated for clarity
output_df = transformed_rel.df()
output_df

┌───────┬─────────┐
│   i   │    j    │
│ int64 │ varchar │
├───────┼─────────┤
│     1 │ one     │
│     2 │ two     │
│     3 │ three   │
│     4 │ four    │
└───────┴─────────┘



Unnamed: 0,i,j,two_i
0,4,four,8
1,3,three,6
