# Library Import

In [1]:
import duckdb
import pandas as pd

# Installing Extensions

In [2]:
con = duckdb.connect("data_export.db")

In [3]:
con.install_extension("httpfs")
con.load_extension("httpfs")

In [4]:
con.install_extension("spatial")
con.load_extension("spatial")

# Sample Data

In [5]:
con.sql(
    """
        CREATE TABLE IF NOT EXISTS cities AS
        SELECT * EXCLUDE geometry, ST_GeomFromWKB(geometry)
        AS geometry FROM 'https://open.gishub.org/data/duckdb/cities.parquet'
    """
)

In [6]:
con.table("cities").show()

┌─────────┬────────┬───────────┬───────────┬──────────────────┬────────────┬─────────────────────────────┐
│ country │   id   │ latitude  │ longitude │       name       │ population │          geometry           │
│ varchar │ double │  double   │  double   │     varchar      │   double   │          geometry           │
├─────────┼────────┼───────────┼───────────┼──────────────────┼────────────┼─────────────────────────────┤
│ UGA     │    1.0 │    0.5833 │   32.5333 │ Bombo            │    75000.0 │ POINT (32.5333 0.5833)      │
│ UGA     │    2.0 │     0.671 │    30.275 │ Fort Portal      │    42670.0 │ POINT (30.275 0.671)        │
│ ITA     │    3.0 │    40.642 │    15.799 │ Potenza          │    69060.0 │ POINT (15.799 40.642)       │
│ ITA     │    4.0 │    41.563 │    14.656 │ Campobasso       │    50762.0 │ POINT (14.656 41.563)       │
│ ITA     │    5.0 │    45.737 │     7.315 │ Aosta            │    34062.0 │ POINT (7.315 45.737)        │
│ ALD     │    6.0 │    60.097 │    1

# To DataFrames

In [7]:
con.table("cities").df()

Unnamed: 0,country,id,latitude,longitude,name,population,geometry
0,UGA,1.0,0.58330,32.53330,Bombo,75000.0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, ..."
1,UGA,2.0,0.67100,30.27500,Fort Portal,42670.0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, ..."
2,ITA,3.0,40.64200,15.79900,Potenza,69060.0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, ..."
3,ITA,4.0,41.56300,14.65600,Campobasso,50762.0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, ..."
4,ITA,5.0,45.73700,7.31500,Aosta,34062.0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, ..."
...,...,...,...,...,...,...,...
1244,BRA,1245.0,-22.92502,-43.22502,Rio de Janeiro,11748000.0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, ..."
1245,BRA,1246.0,-23.55868,-46.62502,Sao Paulo,18845000.0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, ..."
1246,AUS,1247.0,-33.92001,151.18518,Sydney,4630000.0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, ..."
1247,SGP,1248.0,1.29303,103.85582,Singapore,5183700.0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, ..."


# To CSV

In [9]:
con.sql("COPY cities TO './out/cities.csv' (HEADER, DELIMITER ',')")

In [10]:
con.sql(
    """
    COPY (SELECT * FROM cities WHERE country='USA')
    TO './out/cities_us.csv' (HEADER, DELIMITER ',')"""
)

# To JSON

In [11]:
con.sql("COPY cities TO './out/cities.json'")

In [12]:
con.sql("COPY (SELECT * FROM cities WHERE country='USA') TO './out/cities_us.json'")

# To Excel

In [13]:
con.sql(
    "COPY (SELECT * EXCLUDE geometry FROM cities) TO './out/cities.xlsx' WITH (FORMAT GDAL, DRIVER 'XLSX')"
)

# To Parquet

In [14]:
con.sql("COPY cities TO './out/cities.parquet' (FORMAT PARQUET)")

In [15]:
con.sql(
    "COPY (SELECT * FROM cities WHERE country='USA') TO './out/cities_us.parquet' (FORMAT PARQUET)"
)

# To GeoJSON

In [16]:
con.sql("COPY cities TO './out/cities.geojson' WITH (FORMAT GDAL, DRIVER 'GeoJSON')")

In [17]:
con.sql(
    "COPY (SELECT * FROM cities WHERE country='USA') TO './out/cities_us.geojson' WITH (FORMAT GDAL, DRIVER 'GeoJSON')"
)

# To Shapefile

In [18]:
con.sql("COPY cities TO './out/cities.shp' WITH (FORMAT GDAL, DRIVER 'ESRI Shapefile')")

# To GeoPackage

In [19]:
con.sql("COPY cities TO './out/cities.gpkg' WITH (FORMAT GDAL, DRIVER 'GPKG')")

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))