# Library Import

In [1]:
import duckdb
import leafmap

# Sample Data

In [None]:
url = "https://open.gishub.org/data/duckdb/nyc_data.db.zip"
leafmap.download_file(url, unzip=True)

# Connecting to DuckDB

In [3]:
con = duckdb.connect('nyc_data.db')

In [4]:
con.install_extension("spatial")
con.load_extension("spatial")

In [5]:
con.sql("SHOW TABLES;")

┌─────────────────────┐
│        name         │
│       varchar       │
├─────────────────────┤
│ nyc_census_blocks   │
│ nyc_homicides       │
│ nyc_neighborhoods   │
│ nyc_streets         │
│ nyc_subway_stations │
└─────────────────────┘

# Creating samples

In [6]:
con.sql(
    """
        CREATE OR REPLACE TABLE samples (
            name    VARCHAR,
            geom    GEOMETRY
        );

        INSERT INTO samples VALUES
            ('Point', ST_GeomFromText('POINT(-100 40)')),
            ('Linestring', ST_GeomFromText('LINESTRING(0 0, 1 1, 2 1, 2 2)')),
            ('Polygon', ST_GeomFromText('POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))')),
            ('PolygonWithHole', ST_GeomFromText('POLYGON((0 0, 10 0, 10 10, 0 10, 0 0),(1 1, 1 2, 2 2, 2 1, 1 1))')),
            ('Collection', ST_GeomFromText('GEOMETRYCOLLECTION(POINT(2 0),POLYGON((0 0, 1 0, 1 1, 0 1, 0 0)))'));

        SELECT * FROM samples;
    """
)

┌─────────────────┬───────────────────────────────────────────────────────────────────────┐
│      name       │                                 geom                                  │
│     varchar     │                               geometry                                │
├─────────────────┼───────────────────────────────────────────────────────────────────────┤
│ Point           │ POINT (-100 40)                                                       │
│ Linestring      │ LINESTRING (0 0, 1 1, 2 1, 2 2)                                       │
│ Polygon         │ POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))                                   │
│ PolygonWithHole │ POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0), (1 1, 1 2, 2 2, 2 1, 1 1))    │
│ Collection      │ GEOMETRYCOLLECTION (POINT (2 0), POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))) │
└─────────────────┴───────────────────────────────────────────────────────────────────────┘

In [7]:
con.sql("SELECT name, ST_AsText(geom) AS geometry FROM samples;")

┌─────────────────┬───────────────────────────────────────────────────────────────────────┐
│      name       │                               geometry                                │
│     varchar     │                                varchar                                │
├─────────────────┼───────────────────────────────────────────────────────────────────────┤
│ Point           │ POINT (-100 40)                                                       │
│ Linestring      │ LINESTRING (0 0, 1 1, 2 1, 2 2)                                       │
│ Polygon         │ POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))                                   │
│ PolygonWithHole │ POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0), (1 1, 1 2, 2 2, 2 1, 1 1))    │
│ Collection      │ GEOMETRYCOLLECTION (POINT (2 0), POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))) │
└─────────────────┴───────────────────────────────────────────────────────────────────────┘

In [8]:
con.sql(
    """
        COPY samples TO './out/samples.geojson' (FORMAT GDAL, DRIVER GeoJSON);
    """
)

In [None]:
import geopandas as gpd

gdf = gpd.read_file("./out/samples.geojson")
gdf.explore()

# Points

In [10]:
con.sql(
    """
        SELECT ST_AsText(geom)
        FROM samples
        WHERE name = 'Point';
    """
)

┌─────────────────┐
│ st_astext(geom) │
│     varchar     │
├─────────────────┤
│ POINT (-100 40) │
└─────────────────┘

- `ST_X(geom)` returns the X ordinate
- `ST_Y(geom)` returns the Y ordinate

In [11]:
con.sql(
    """
        SELECT ST_X(geom), ST_Y(geom)
        FROM samples
        WHERE name = 'Point';
    """
)

┌────────────┬────────────┐
│ st_x(geom) │ st_y(geom) │
│   double   │   double   │
├────────────┼────────────┤
│     -100.0 │       40.0 │
└────────────┴────────────┘

In [12]:
con.sql(
    """
        SELECT * FROM nyc_subway_stations
    """
)

┌──────────┬────────┬───────────────────┬───┬──────────────┬─────────┬─────────┬──────────────────────┐
│ OBJECTID │   ID   │       NAME        │ … │    COLOR     │ EXPRESS │ CLOSED  │         geom         │
│  double  │ double │      varchar      │   │   varchar    │ varchar │ varchar │       geometry       │
├──────────┼────────┼───────────────────┼───┼──────────────┼─────────┼─────────┼──────────────────────┤
│      1.0 │  376.0 │ Cortlandt St      │ … │ YELLOW       │ NULL    │ NULL    │ POINT (583521.8544…  │
│      2.0 │    2.0 │ Rector St         │ … │ RED          │ NULL    │ NULL    │ POINT (583324.4866…  │
│      3.0 │    1.0 │ South Ferry       │ … │ RED          │ NULL    │ NULL    │ POINT (583304.1823…  │
│      4.0 │  125.0 │ 138th St          │ … │ GREEN        │ NULL    │ NULL    │ POINT (590250.1059…  │
│      5.0 │  126.0 │ 149th St          │ … │ GREEN        │ express │ NULL    │ POINT (590454.7399…  │
│      6.0 │   45.0 │ 149th St          │ … │ RED-GREEN    │ exp

In [13]:
con.sql(
    """
        SELECT name, ST_AsText(geom)
        FROM nyc_subway_stations
        LIMIT 10;
    """
)

┌──────────────┬─────────────────────────────────────────────┐
│     NAME     │               st_astext(geom)               │
│   varchar    │                   varchar                   │
├──────────────┼─────────────────────────────────────────────┤
│ Cortlandt St │ POINT (583521.854408956 4507077.862599085)  │
│ Rector St    │ POINT (583324.4866324601 4506805.373160211) │
│ South Ferry  │ POINT (583304.1823994748 4506069.654048115) │
│ 138th St     │ POINT (590250.10594797 4518558.019924332)   │
│ 149th St     │ POINT (590454.7399891173 4519145.719617855) │
│ 149th St     │ POINT (590465.8934191109 4519168.697483203) │
│ 161st St     │ POINT (590573.169495527 4520214.766177284)  │
│ 167th St     │ POINT (591252.8314104103 4520950.353355553) │
│ 167th St     │ POINT (590946.3972262995 4521077.318976877) │
│ 170th St     │ POINT (591583.6111452815 4521434.846626811) │
├──────────────┴─────────────────────────────────────────────┤
│ 10 rows                                          2 co

# Linestrings

In [14]:
con.sql(
    """
        SELECT ST_AsText(geom)
        FROM samples
        WHERE name = 'Linestring';
    """
)

┌─────────────────────────────────┐
│         st_astext(geom)         │
│             varchar             │
├─────────────────────────────────┤
│ LINESTRING (0 0, 1 1, 2 1, 2 2) │
└─────────────────────────────────┘

- `ST_Length(geom)` returns the length of the linestring
- `ST_StartPoint(geom)` returns the first coordinate as a point
- `ST_EndPoint(geom)` returns the last coordinate as a point
- `ST_NPoints(geom)` returns the number of coordinates in the linestring

In [15]:
con.sql(
    """
        SELECT ST_Length(geom), ST_StartPoint(geom),
               ST_EndPoint(geom), ST_NPoints(geom)
        FROM samples
        WHERE name = 'Linestring';
    """
)

┌───────────────────┬─────────────────────┬───────────────────┬──────────────────┐
│  st_length(geom)  │ st_startpoint(geom) │ st_endpoint(geom) │ st_npoints(geom) │
│      double       │      geometry       │     geometry      │      uint32      │
├───────────────────┼─────────────────────┼───────────────────┼──────────────────┤
│ 3.414213562373095 │ POINT (0 0)         │ POINT (2 2)       │                4 │
└───────────────────┴─────────────────────┴───────────────────┴──────────────────┘

# Polygons

In [19]:
con.sql(
    """
        SELECT ST_AsText(geom)
        FROM samples
        WHERE name LIKE 'Polygon%';
    """
)

┌────────────────────────────────────────────────────────────────────┐
│                          st_astext(geom)                           │
│                              varchar                               │
├────────────────────────────────────────────────────────────────────┤
│ POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))                                │
│ POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0), (1 1, 1 2, 2 2, 2 1, 1 1)) │
└────────────────────────────────────────────────────────────────────┘

- `ST_Area(geom)` returns the area of the polygons
- `ST_NRings(geom)` returns the number of rings (usually 1, more of there are holes)
- `ST_ExteriorRing(geom)` returns the outer ring as a linestring
- `ST_InteriorRingN(geometry, n)` returns a specified interior ring as a linestring
- `ST_Perimeter(geom)` returns the length of all the rings

In [28]:
con.sql(
    """
        SELECT name, ST_Area(geom)
        FROM samples
        WHERE name LIKE 'Polygon%';
    """
)

┌─────────────────┬───────────────┐
│      name       │ st_area(geom) │
│     varchar     │    double     │
├─────────────────┼───────────────┤
│ Polygon         │           1.0 │
│ PolygonWithHole │          99.0 │
└─────────────────┴───────────────┘

# Collections

- `MultiPoint`, a collection of points
- `MultiLineString`, a collection of linestrings
- `MultiPolygon`, a collection of polygons
- `GeometryCollection`, a heterogeneous collection of any geometry (including other collections)

In [29]:
con.sql(
    """
        SELECT name, ST_AsText(geom)
        FROM samples
        WHERE name = 'Collection';
    """
)

┌────────────┬───────────────────────────────────────────────────────────────────────┐
│    name    │                            st_astext(geom)                            │
│  varchar   │                                varchar                                │
├────────────┼───────────────────────────────────────────────────────────────────────┤
│ Collection │ GEOMETRYCOLLECTION (POINT (2 0), POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))) │
└────────────┴───────────────────────────────────────────────────────────────────────┘

# Data Visualization

In [30]:
con.sql("SHOW TABLES;")

┌─────────────────────┐
│        name         │
│       varchar       │
├─────────────────────┤
│ nyc_census_blocks   │
│ nyc_homicides       │
│ nyc_neighborhoods   │
│ nyc_streets         │
│ nyc_subway_stations │
│ samples             │
└─────────────────────┘

In [31]:
subway_stations_df = con.sql("SELECT * EXCLUDE geom, ST_AsText(geom) as geometry FROM nyc_subway_stations").df()
subway_stations_df.head()

Unnamed: 0,OBJECTID,ID,NAME,ALT_NAME,CROSS_ST,LONG_NAME,LABEL,BOROUGH,NGHBHD,ROUTES,TRANSFERS,COLOR,EXPRESS,CLOSED,geometry
0,1.0,376.0,Cortlandt St,,Church St,"Cortlandt St (R,W) Manhattan","Cortlandt St (R,W)",Manhattan,,"R,W","R,W",YELLOW,,,POINT (583521.854408956 4507077.862599085)
1,2.0,2.0,Rector St,,,Rector St (1) Manhattan,Rector St (1),Manhattan,,1,1,RED,,,POINT (583324.4866324601 4506805.373160211)
2,3.0,1.0,South Ferry,,,South Ferry (1) Manhattan,South Ferry (1),Manhattan,,1,1,RED,,,POINT (583304.1823994748 4506069.654048115)
3,4.0,125.0,138th St,Grand Concourse,Grand Concourse,"138th St / Grand Concourse (4,5) Bronx","138th St / Grand Concourse (4,5)",Bronx,,45,45,GREEN,,,POINT (590250.10594797 4518558.019924332)
4,5.0,126.0,149th St,Grand Concourse,Grand Concourse,149th St / Grand Concourse (4) Bronx,149th St / Grand Concourse (4),Bronx,,4,245,GREEN,express,,POINT (590454.7399891173 4519145.719617855)


In [32]:
subway_stations_gdf = leafmap.df_to_gdf(subway_stations_df, src_crs="EPSG:26918", dst_crs="EPSG:4326")
subway_stations_gdf.head()

Unnamed: 0,OBJECTID,ID,NAME,ALT_NAME,CROSS_ST,LONG_NAME,LABEL,BOROUGH,NGHBHD,ROUTES,TRANSFERS,COLOR,EXPRESS,CLOSED,geometry
0,1.0,376.0,Cortlandt St,,Church St,"Cortlandt St (R,W) Manhattan","Cortlandt St (R,W)",Manhattan,,"R,W","R,W",YELLOW,,,POINT (-74.01122 40.71038)
1,2.0,2.0,Rector St,,,Rector St (1) Manhattan,Rector St (1),Manhattan,,1,1,RED,,,POINT (-74.01359 40.70795)
2,3.0,1.0,South Ferry,,,South Ferry (1) Manhattan,South Ferry (1),Manhattan,,1,1,RED,,,POINT (-74.01393 40.70132)
3,4.0,125.0,138th St,Grand Concourse,Grand Concourse,"138th St / Grand Concourse (4,5) Bronx","138th St / Grand Concourse (4,5)",Bronx,,45,45,GREEN,,,POINT (-73.92992 40.81308)
4,5.0,126.0,149th St,Grand Concourse,Grand Concourse,149th St / Grand Concourse (4) Bronx,149th St / Grand Concourse (4),Bronx,,4,245,GREEN,express,,POINT (-73.92741 40.81835)


In [None]:
subway_stations_gdf.explore()

In [35]:
nyc_streets_df = con.sql("SELECT * EXCLUDE geom, ST_AsText(geom) as geometry FROM nyc_streets").df()
nyc_streets_df.head()

Unnamed: 0,ID,NAME,ONEWAY,TYPE,geometry
0,1,Shore Pky S,,residential,MULTILINESTRING ((586785.4767897038 4492901.00...
1,2,,,footway,MULTILINESTRING ((586645.0073625665 4504977.75...
2,3,Avenue O,,residential,MULTILINESTRING ((586750.3019977848 4496109.72...
3,4,Walsh Ct,,residential,MULTILINESTRING ((586728.695515043 4497971.053...
4,5,,,motorway_link,MULTILINESTRING ((586587.0531467082 4510088.25...


In [36]:
nyc_streets_gdf = leafmap.df_to_gdf(nyc_streets_df, src_crs="EPSG:26918", dst_crs="EPSG:4326")
nyc_streets_gdf.head()

Unnamed: 0,ID,NAME,ONEWAY,TYPE,geometry
0,1,Shore Pky S,,residential,"MULTILINESTRING ((-73.97454 40.58235, -73.9732..."
1,2,,,footway,"MULTILINESTRING ((-73.97454 40.69114, -73.9743..."
2,3,Avenue O,,residential,"MULTILINESTRING ((-73.97452 40.61126, -73.9734..."
3,4,Walsh Ct,,residential,"MULTILINESTRING ((-73.97451 40.62802, -73.9726..."
4,5,,,motorway_link,"MULTILINESTRING ((-73.97452 40.73718, -73.9738..."


In [None]:
nyc_streets_gdf.explore()