In [49]:
import duckdb
import pandas as pd
import polars as pl

In [2]:
# TODO: explain why we create a connection
con = duckdb.connect()
con.sql("SELECT 42 as X").show()
con.close()

┌───────┐
│   X   │
│ int32 │
├───────┤
│    42 │
└───────┘



In [4]:
with duckdb.connect("../duckdbdata/file.db") as con:
    con.sql("CREATE TABLE test (i INTEGER)")
    con.sql("INSERT INTO test VALUES (42)")
    con.table("test").show()

┌───────┐
│   i   │
│ int32 │
├───────┤
│    42 │
└───────┘



In [5]:
con = duckdb.connect("../duckdbdata/chicago.db")

In [7]:
df1 = pd.read_parquet("../duckdbdata/ChicagoParkingTickets.parquet")

In [9]:
con.execute("CREATE TABLE ChicagoParkingTickets AS SELECT * FROM df1")

<duckdb.duckdb.DuckDBPyConnection at 0x1f0a4406870>

In [10]:
con.table("ChicagoParkingTickets").show()

┌───────────────┬─────────────────────┬────────────────┬────────────────────┬────────────────────┬────────────────┬───────────────────┬────────────────────┬─────────────────────────┬──────────────────────────────────┬──────────────┬───────┬──────────┬───────┬─────────────────┬────────────┬─────────────────────┬─────────┬──────────────┬────────────┬──────────────────────┐
│ Ticket_number │     Issued_date     │ Community_Name │       Sector       │        Side        │ Hardship_Index │ Per_capita_income │ Percent_unemployed │ Percent_without_diploma │ Percent_households_below_poverty │ Neighborhood │ Ward  │  Tract   │  ZIP  │ Police_District │ Plate_Type │ License_Plate_State │ Unit_ID │ Violation_ID │ Officer_ID │ PaymentIsOutstanding │
│     int64     │       varchar       │    varchar     │      varchar       │      varchar       │     double     │      double       │       double       │         double          │              double              │   varchar    │ int64 │  double  │ 

In [11]:
con.sql("DESCRIBE ChicagoParkingTickets")

┌──────────────────────────────────┬─────────────┬─────────┬─────────┬─────────┬─────────┐
│           column_name            │ column_type │  null   │   key   │ default │  extra  │
│             varchar              │   varchar   │ varchar │ varchar │ varchar │ varchar │
├──────────────────────────────────┼─────────────┼─────────┼─────────┼─────────┼─────────┤
│ Ticket_number                    │ BIGINT      │ YES     │ NULL    │ NULL    │ NULL    │
│ Issued_date                      │ VARCHAR     │ YES     │ NULL    │ NULL    │ NULL    │
│ Community_Name                   │ VARCHAR     │ YES     │ NULL    │ NULL    │ NULL    │
│ Sector                           │ VARCHAR     │ YES     │ NULL    │ NULL    │ NULL    │
│ Side                             │ VARCHAR     │ YES     │ NULL    │ NULL    │ NULL    │
│ Hardship_Index                   │ DOUBLE      │ YES     │ NULL    │ NULL    │ NULL    │
│ Per_capita_income                │ DOUBLE      │ YES     │ NULL    │ NULL    │ NULL    │

In [None]:
con.sql("SUMMARIZE ChicagoParkingTickets")

┌──────────────────────────────────┬─────────────┬─────────────────────────┬─────────────────────┬───────────────┬────────────────────┬────────────────────┬────────────────────┬────────────────────┬────────────────────┬──────────┬─────────────────┐
│           column_name            │ column_type │           min           │         max         │ approx_unique │        avg         │        std         │        q25         │        q50         │        q75         │  count   │ null_percentage │
│             varchar              │   varchar   │         varchar         │       varchar       │     int64     │      varchar       │      varchar       │      varchar       │      varchar       │      varchar       │  int64   │  decimal(9,2)   │
├──────────────────────────────────┼─────────────┼─────────────────────────┼─────────────────────┼───────────────┼────────────────────┼────────────────────┼────────────────────┼────────────────────┼────────────────────┼──────────┼─────────────────┤
│ Ti

In [15]:
con.sql("CREATE TABLE cpt_summary AS SELECT * FROM (SUMMARIZE ChicagoParkingTickets)")

In [16]:
con.sql("SELECT COUNT(*) FROM ChicagoParkingTickets")

┌──────────────┐
│ count_star() │
│    int64     │
├──────────────┤
│     49950127 │
└──────────────┘

In [17]:
con.sql("SELECT * FROM cpt_summary")

┌──────────────────────────────────┬─────────────┬─────────────────────────┬─────────────────────┬───────────────┬────────────────────┬────────────────────┬────────────────────┬────────────────────┬────────────────────┬──────────┬─────────────────┐
│           column_name            │ column_type │           min           │         max         │ approx_unique │        avg         │        std         │        q25         │        q50         │        q75         │  count   │ null_percentage │
│             varchar              │   varchar   │         varchar         │       varchar       │     int64     │      varchar       │      varchar       │      varchar       │      varchar       │      varchar       │  int64   │  decimal(9,2)   │
├──────────────────────────────────┼─────────────┼─────────────────────────┼─────────────────────┼───────────────┼────────────────────┼────────────────────┼────────────────────┼────────────────────┼────────────────────┼──────────┼─────────────────┤
│ Ti

In [18]:
con.sql("""
    SELECT
        Police_District,
        CASE WHEN License_Plate_State = 'IL' THEN 'In-State' ELSE 'Out-of-State' END AS License_Plate_State,
        SUM(PaymentIsOutstanding) AS TicketsOutstanding
    FROM ChicagoParkingTickets
    GROUP BY
            Police_District,
            CASE WHEN License_Plate_State = 'IL' THEN 'In-State' ELSE 'Out-of-State' END
    ORDER BY SUM(PaymentIsOutstanding) DESC
""")

┌─────────────────┬─────────────────────┬────────────────────┐
│ Police_District │ License_Plate_State │ TicketsOutstanding │
│     double      │       varchar       │       int128       │
├─────────────────┼─────────────────────┼────────────────────┤
│             1.0 │ In-State            │            1073606 │
│            10.0 │ In-State            │            1018027 │
│            12.0 │ In-State            │             863646 │
│            18.0 │ In-State            │             836144 │
│             9.0 │ In-State            │             730582 │
│             8.0 │ In-State            │             717649 │
│            19.0 │ In-State            │             702865 │
│            25.0 │ In-State            │             535055 │
│            14.0 │ In-State            │             520815 │
│            15.0 │ In-State            │             509890 │
│              ·  │    ·                │                ·   │
│              ·  │    ·                │              

In [19]:
con.sql("""
    WITH records AS (
    SELECT
        Police_District,
        CASE WHEN License_Plate_State = 'IL' THEN 'In-State' ELSE 'Out-of-State' END AS License_Plate_State,
        SUM(PaymentIsOutstanding) AS TicketsOutstanding
    FROM ChicagoParkingTickets
    GROUP BY
            Police_District,
            CASE WHEN License_Plate_State = 'IL' THEN 'In-State' ELSE 'Out-of-State' END
    )
    PIVOT records
    ON License_Plate_State
    USING SUM(TicketsOutstanding)
    ORDER BY Police_District
""")

┌─────────────────┬──────────┬──────────────┐
│ Police_District │ In-State │ Out-of-State │
│     double      │  int128  │    int128    │
├─────────────────┼──────────┼──────────────┤
│             1.0 │  1073606 │       227079 │
│             2.0 │   477357 │        72515 │
│             3.0 │   418782 │        40902 │
│             4.0 │   388914 │        37860 │
│             5.0 │   250010 │        18587 │
│             6.0 │   427142 │        35541 │
│             7.0 │   432358 │        25163 │
│             8.0 │   717649 │        67161 │
│             9.0 │   730582 │        82071 │
│            10.0 │  1018027 │        78570 │
│            11.0 │   482732 │        26311 │
│            12.0 │   863646 │       148139 │
│            14.0 │   520815 │        85161 │
│            15.0 │   509890 │        27189 │
│            16.0 │   244237 │        19435 │
│            17.0 │   393459 │        39429 │
│            18.0 │   836144 │       280370 │
│            19.0 │   702865 │    

In [20]:
con.sql("""
    WITH records AS (
    SELECT
        Police_District,
        CASE WHEN License_Plate_State = 'IL' THEN 'In-State' ELSE 'Out-of-State' END AS License_Plate_State,
        SUM(PaymentIsOutstanding) AS TicketsOutstanding
    FROM ChicagoParkingTickets
    GROUP BY
            Police_District,
            CASE WHEN License_Plate_State = 'IL' THEN 'In-State' ELSE 'Out-of-State' END
    )
    PIVOT records
    ON Police_District IN (1.0, 2.0, 3.0, 4.0)
    USING SUM(TicketsOutstanding)
""")

┌─────────────────────┬─────────┬────────┬────────┬────────┐
│ License_Plate_State │   1.0   │  2.0   │  3.0   │  4.0   │
│       varchar       │ int128  │ int128 │ int128 │ int128 │
├─────────────────────┼─────────┼────────┼────────┼────────┤
│ In-State            │ 1073606 │ 477357 │ 418782 │ 388914 │
│ Out-of-State        │  227079 │  72515 │  40902 │  37860 │
└─────────────────────┴─────────┴────────┴────────┴────────┘

In [21]:
con.sql("""    
    WITH records AS (
        SELECT
            Police_District,
            License_Plate_State,
            PaymentIsOutstanding
        FROM ChicagoParkingTickets
    )
    PIVOT records
    ON Police_District IN (1.0, 2.0, 3.0, 4.0)
    USING SUM(PaymentIsOutstanding)
    ORDER BY License_Plate_State
""").show(max_rows=100)

┌─────────────────────┬─────────┬────────┬────────┬────────┐
│ License_Plate_State │   1.0   │  2.0   │  3.0   │  4.0   │
│       varchar       │ int128  │ int128 │ int128 │ int128 │
├─────────────────────┼─────────┼────────┼────────┼────────┤
│ AB                  │     156 │     79 │     27 │     13 │
│ AK                  │     317 │    128 │     74 │     35 │
│ AL                  │    1634 │    706 │    513 │    366 │
│ AR                  │    1197 │    447 │    335 │    181 │
│ AZ                  │    3248 │    974 │    621 │    341 │
│ BC                  │     260 │     38 │      3 │      3 │
│ CA                  │    9258 │   4620 │   1041 │    605 │
│ CO                  │    2340 │    731 │    216 │     80 │
│ CT                  │    1066 │    409 │     67 │     42 │
│ DC                  │     192 │    135 │     17 │     11 │
│ DE                  │     185 │    123 │     26 │     10 │
│ FL                  │    6504 │   1671 │    680 │    352 │
│ GA                  │ 

In [22]:
con.sql("""    
    WITH records AS (
        SELECT
            Police_District,
            Officer_ID,
            License_Plate_State,
            PaymentIsOutstanding
        FROM ChicagoParkingTickets
    )
    PIVOT records
    ON Police_District IN (1.0, 2.0, 3.0, 4.0)
    USING SUM(PaymentIsOutstanding), MAX(Officer_ID)
    ORDER BY License_Plate_State
""").show(max_rows=100)

┌─────────────────────┬───────────────────────────────┬─────────────────────┬───────────────────────────────┬─────────────────────┬───────────────────────────────┬─────────────────────┬───────────────────────────────┬─────────────────────┐
│ License_Plate_State │ 1.0_sum(PaymentIsOutstanding) │ 1.0_max(Officer_ID) │ 2.0_sum(PaymentIsOutstanding) │ 2.0_max(Officer_ID) │ 3.0_sum(PaymentIsOutstanding) │ 3.0_max(Officer_ID) │ 4.0_sum(PaymentIsOutstanding) │ 4.0_max(Officer_ID) │
│       varchar       │            int128             │        int64        │            int128             │        int64        │            int128             │        int64        │            int128             │        int64        │
├─────────────────────┼───────────────────────────────┼─────────────────────┼───────────────────────────────┼─────────────────────┼───────────────────────────────┼─────────────────────┼───────────────────────────────┼─────────────────────┤
│ AB                  │                 

In [23]:
con.sql("""    
    WITH records AS (
        SELECT
            Police_District,
            License_Plate_State,
            PaymentIsOutstanding
        FROM ChicagoParkingTickets
    ),
    pivoted AS (
        PIVOT records
        ON Police_District IN (1.0, 2.0, 3.0, 4.0)
        USING SUM(PaymentIsOutstanding)
    )
    UNPIVOT pivoted
    ON COLUMNS(* EXCLUDE(License_Plate_State))
    INTO
        NAME Police_District
        VALUE PaymentIsOutstanding
    ORDER BY Police_District ASC
""").show(max_rows=100)

┌─────────────────────┬─────────────────┬──────────────────────┐
│ License_Plate_State │ Police_District │ PaymentIsOutstanding │
│       varchar       │     varchar     │        int128        │
├─────────────────────┼─────────────────┼──────────────────────┤
│ WA                  │ 1.0             │                 1578 │
│ KY                  │ 1.0             │                 2417 │
│ AB                  │ 1.0             │                  156 │
│ MX                  │ 1.0             │                  102 │
│ PQ                  │ 1.0             │                   13 │
│ ID                  │ 1.0             │                  207 │
│ OK                  │ 1.0             │                 1762 │
│ VI                  │ 1.0             │                  128 │
│ AR                  │ 1.0             │                 1197 │
│ KS                  │ 1.0             │                 1911 │
│ ND                  │ 1.0             │                  251 │
│ NJ                  │ 1

In [24]:
# Aliases in WHERE/GROUP BY/HAVING clauses
# CANNOT be used in JOIN-ON clause!
con.sql("""
    SELECT
        Police_District AS pd,
        CASE WHEN License_Plate_State = 'IL' THEN 'In-State' ELSE 'Out-of-State' END AS License_Plate_State,
        SUM(PaymentIsOutstanding) AS TicketsOutstanding
    FROM ChicagoParkingTickets
    WHERE
        pd IN (1.0, 2.0, 3.0, 4.0)
    GROUP BY
        Police_District,
        CASE WHEN License_Plate_State = 'IL' THEN 'In-State' ELSE 'Out-of-State' END
    ORDER BY SUM(PaymentIsOutstanding) DESC
""")

┌────────┬─────────────────────┬────────────────────┐
│   pd   │ License_Plate_State │ TicketsOutstanding │
│ double │       varchar       │       int128       │
├────────┼─────────────────────┼────────────────────┤
│    1.0 │ In-State            │            1073606 │
│    2.0 │ In-State            │             477357 │
│    3.0 │ In-State            │             418782 │
│    4.0 │ In-State            │             388914 │
│    1.0 │ Out-of-State        │             227079 │
│    2.0 │ Out-of-State        │              72515 │
│    3.0 │ Out-of-State        │              40902 │
│    4.0 │ Out-of-State        │              37860 │
└────────┴─────────────────────┴────────────────────┘

In [25]:
# COLUMNS() and lambda expressions
# NOTE: cannot use COLUMNS() in GROUP BY clause
con.sql("""
    SELECT
        Police_District AS pd,
        COLUMNS(col -> col LIKE 'Per%')
    FROM ChicagoParkingTickets
    WHERE
        pd IN (1.0, 2.0, 3.0, 4.0)
    LIMIT 10
""")

┌────────┬───────────────────┬────────────────────┬─────────────────────────┬──────────────────────────────────┐
│   pd   │ Per_capita_income │ Percent_unemployed │ Percent_without_diploma │ Percent_households_below_poverty │
│ double │      double       │       double       │         double          │              double              │
├────────┼───────────────────┼────────────────────┼─────────────────────────┼──────────────────────────────────┤
│    1.0 │           65526.0 │                5.7 │                     3.1 │                             14.7 │
│    1.0 │           65526.0 │                5.7 │                     3.1 │                             14.7 │
│    1.0 │           65526.0 │                5.7 │                     3.1 │                             14.7 │
│    1.0 │           59077.0 │                4.9 │                     7.4 │                             13.8 │
│    1.0 │           65526.0 │                5.7 │                     3.1 │                   

In [27]:
# Scalar function chaining and trailing commas
con.sql("""
    SELECT
        Sector,
        Sector.upper().replace(' ', '_').trim() AS Sector_upper,
    FROM ChicagoParkingTickets
    WHERE
        Police_District IN (1.0, 2.0, 3.0, 4.0)
    LIMIT 10
""")

┌─────────────────┬─────────────────┐
│     Sector      │  Sector_upper   │
│     varchar     │     varchar     │
├─────────────────┼─────────────────┤
│ Loop            │ LOOP            │
│ Loop            │ LOOP            │
│ Loop            │ LOOP            │
│ Near South Side │ NEAR_SOUTH_SIDE │
│ Loop            │ LOOP            │
│ Loop            │ LOOP            │
│ Loop            │ LOOP            │
│ Loop            │ LOOP            │
│ Loop            │ LOOP            │
│ Loop            │ LOOP            │
├─────────────────┴─────────────────┤
│ 10 rows                 2 columns │
└───────────────────────────────────┘

In [None]:
con.sql("""
    CREATE OR REPLACE TABLE LicensePlateState
    (
        State_Abbreviation VARCHAR,
        Region VARCHAR
    );
        
    INSERT INTO LicensePlateState
    SELECT DISTINCT
        License_Plate_State,
        CASE WHEN License_Plate_State IN ('IL') THEN 'In-State'
             WHEN License_Plate_State IN ('IN', 'WI', 'MI', 'MO', 'IA', 'KY') THEN 'Neighbors'
             WHEN License_Plate_State IN ('AB', 'BC', 'GU', 'MB', 'MX', 'NB', 'NF', 'NS', 'ON', 'PE', 'PQ', 'PR', 'QU', 'XX', 'YT', 'ZZ') THEN 'Out-Of-Country'
             ELSE 'In-Country'
        END AS Region
    FROM ChicagoParkingTickets;
""")

In [None]:
con.sql("""
    CREATE OR REPLACE TABLE CommunityDetails
    (
        Community_Name VARCHAR,
        Sector VARCHAR,
        Side VARCHAR,
        Hardship_Index DOUBLE,
        Per_capita_income DOUBLE,
        Percent_unemployed DOUBLE,
        Percent_without_diploma DOUBLE,
        Percent_households_below_poverty DOUBLE,
    );
        
    INSERT INTO CommunityDetails
    SELECT DISTINCT
        Community_Name,
        Sector,
        Side,
        Hardship_Index,
        Per_capita_income,
        Percent_unemployed,
        Percent_without_diploma,
        Percent_households_below_poverty
    FROM ChicagoParkingTickets;
""")

In [36]:
con.sql("""
    SELECT
        lps.Region,
        COUNT(*) AS NumberOfTickets,
        SUM(cpt.PaymentIsOutstanding) AS TotalOutstanding,
        CAST(100.0 * SUM(cpt.PaymentIsOutstanding) / COUNT(*) AS DECIMAL(5,2)) AS PctOutstanding
    FROM ChicagoParkingTickets AS cpt
        INNER JOIN LicensePlateState AS lps
            ON cpt.License_Plate_State = lps.State_Abbreviation
    GROUP BY
        lps.Region
    ORDER BY
        lps.Region;
""").show(max_rows=100)

┌────────────────┬─────────────────┬──────────────────┬────────────────┐
│     Region     │ NumberOfTickets │ TotalOutstanding │ PctOutstanding │
│    varchar     │      int64      │      int128      │  decimal(5,2)  │
├────────────────┼─────────────────┼──────────────────┼────────────────┤
│ In-Country     │         1799407 │           784477 │          43.60 │
│ In-State       │        45712525 │         12079718 │          26.43 │
│ Neighbors      │         2395748 │          1007234 │          42.04 │
│ Out-Of-Country │           40389 │            30482 │          75.47 │
└────────────────┴─────────────────┴──────────────────┴────────────────┘



In [47]:
con.sql("""
    FROM LicensePlateState AS lps
    ORDER BY
        lps.State_Abbreviation;
""").show(max_rows=100)

┌────────────────────┬────────────────┐
│ State_Abbreviation │     Region     │
│      varchar       │    varchar     │
├────────────────────┼────────────────┤
│ AB                 │ Out-Of-Country │
│ AK                 │ In-Country     │
│ AL                 │ In-Country     │
│ AR                 │ In-Country     │
│ AZ                 │ In-Country     │
│ BC                 │ Out-Of-Country │
│ CA                 │ In-Country     │
│ CO                 │ In-Country     │
│ CT                 │ In-Country     │
│ DC                 │ In-Country     │
│ DE                 │ In-Country     │
│ FL                 │ In-Country     │
│ GA                 │ In-Country     │
│ GU                 │ Out-Of-Country │
│ HI                 │ In-Country     │
│ IA                 │ Neighbors      │
│ ID                 │ In-Country     │
│ IL                 │ In-State       │
│ IN                 │ Neighbors      │
│ KS                 │ In-Country     │
│ KY                 │ Neighbors      │


In [44]:
con.sql("""
    FROM ChicagoParkingTickets AS cpt
        INNER JOIN LicensePlateState AS lps
            ON cpt.License_Plate_State = lps.State_Abbreviation
    SELECT
        lps.Region,
        COUNT(*) AS NumberOfTickets,
        SUM(cpt.PaymentIsOutstanding) AS TotalOutstanding,
        CAST(100.0 * SUM(cpt.PaymentIsOutstanding) / COUNT(*) AS DECIMAL(5,2)) AS PctOutstanding
    GROUP BY
        lps.Region
    ORDER BY
        lps.Region;
""").show(max_rows=100)

┌────────────────┬─────────────────┬──────────────────┬────────────────┐
│     Region     │ NumberOfTickets │ TotalOutstanding │ PctOutstanding │
│    varchar     │      int64      │      int128      │  decimal(5,2)  │
├────────────────┼─────────────────┼──────────────────┼────────────────┤
│ In-Country     │         1799407 │           784477 │          43.60 │
│ In-State       │        45712525 │         12079718 │          26.43 │
│ Neighbors      │         2395748 │          1007234 │          42.04 │
│ Out-Of-Country │           40389 │            30482 │          75.47 │
└────────────────┴─────────────────┴──────────────────┴────────────────┘



In [None]:
con.sql("""
    SELECT
        lps.Region,
        COUNT(*) AS NumberOfTickets,
        SUM(cpt.PaymentIsOutstanding) AS TotalOutstanding,
        CAST(100.0 * SUM(cpt.PaymentIsOutstanding) / COUNT(*) AS DECIMAL(5,2)) AS PctOutstanding
    FROM ChicagoParkingTickets AS cpt
        INNER JOIN LicensePlateState AS lps
            ON cpt.License_Plate_State = lps.State_Abbreviation
    GROUP BY
        lps.Region
    ORDER BY
        lps.Region;
""").show(max_rows=100)

In [39]:
con.sql("""
    SELECT *
    FROM
        range(5) t(i),
        LATERAL (SELECT i + 1) t2(j);
""")

┌───────┬───────┐
│   i   │   j   │
│ int64 │ int64 │
├───────┼───────┤
│     0 │     1 │
│     2 │     3 │
│     1 │     2 │
│     4 │     5 │
│     3 │     4 │
└───────┴───────┘

In [40]:
con.sql("""
    SELECT *
    FROM
        generate_series(0, 2) t(i),
        LATERAL (SELECT i + 10 UNION ALL SELECT i + 100) t2(j)
    ORDER BY ALL;
""")

┌───────┬───────┐
│   i   │   j   │
│ int64 │ int64 │
├───────┼───────┤
│     0 │    10 │
│     0 │   100 │
│     1 │    11 │
│     1 │   101 │
│     2 │    12 │
│     2 │   102 │
└───────┴───────┘

In [43]:
con.sql("""
    CREATE OR REPLACE TABLE t1 (x INTEGER);
    CREATE OR REPLACE TABLE t2 (s VARCHAR);

    INSERT INTO t1 VALUES (1), (2), (3);
    INSERT INTO t2 VALUES ('a'), ('b');

    SELECT *
    FROM t1
    POSITIONAL JOIN t2;
""")

┌───────┬─────────┐
│   x   │    s    │
│ int32 │ varchar │
├───────┼─────────┤
│     1 │ a       │
│     2 │ b       │
│     3 │ NULL    │
└───────┴─────────┘

In [50]:
df = pl.DataFrame(
    {
        "A": [1, 2, 3, 4, 5],
        "fruits": ["banana", "banana", "apple", "apple", "banana"],
        "B": [5, 4, 3, 2, 1],
        "cars": ["beetle", "audi", "beetle", "beetle", "beetle"],
    }
)
duckdb.sql("SELECT * FROM df").show()

┌───────┬─────────┬───────┬─────────┐
│   A   │ fruits  │   B   │  cars   │
│ int64 │ varchar │ int64 │ varchar │
├───────┼─────────┼───────┼─────────┤
│     1 │ banana  │     5 │ beetle  │
│     2 │ banana  │     4 │ audi    │
│     3 │ apple   │     3 │ beetle  │
│     4 │ apple   │     2 │ beetle  │
│     5 │ banana  │     1 │ beetle  │
└───────┴─────────┴───────┴─────────┘



In [51]:
df = duckdb.sql("""
    SELECT 1 AS id, 'banana' AS fruit
    UNION ALL
    SELECT 2, 'apple'
    UNION ALL
    SELECT 3, 'mango'"""
).pl()
print(df)

shape: (3, 2)
┌─────┬────────┐
│ id  ┆ fruit  │
│ --- ┆ ---    │
│ i32 ┆ str    │
╞═════╪════════╡
│ 1   ┆ banana │
│ 2   ┆ apple  │
│ 3   ┆ mango  │
└─────┴────────┘


In [54]:
type(df)

polars.dataframe.frame.DataFrame

In [55]:
con.execute("CREATE TABLE ChicagoParkingTickets_DELETEME AS SELECT * FROM df1")

<duckdb.duckdb.DuckDBPyConnection at 0x1f0a4406870>

In [None]:
con.execute("DROP TABLE ChicagoParkingTickets_DELETEME")
# Note that there is a .wal file now

<duckdb.duckdb.DuckDBPyConnection at 0x1f0a4406870>

In [None]:
con.execute("VACUUM")
# Does not change size of database

<duckdb.duckdb.DuckDBPyConnection at 0x1f0a4406870>

In [None]:
con.execute("CHECKPOINT")
# Does not change size of database

<duckdb.duckdb.DuckDBPyConnection at 0x1f0a4406870>

In [None]:
# Make sure the database exists
con2 = duckdb.connect("../duckdbdata/chicago2.db")
con2.close()

In [64]:
con.close()

In [None]:
duckdb.execute("""
    ATTACH '../duckdbdata/chicago2.db' AS db2;
    ATTACH '../duckdbdata/chicago.db' AS db1;
    COPY FROM DATABASE db1 to db2;
""")
# Note that chicago2 file size is now smaller

<duckdb.duckdb.DuckDBPyConnection at 0x1f0a47a28f0>

In [None]:
# TODO: Demo on indexing techniques

# TODO: Come up with specific extensions to install
# TODO: Demo on installing extensions

# TODO: SET, PRAGMA, and secrets

# TODO: Demo Explain plans

ConnectionException: Connection Error: Connection already closed!